LLVM 17.0.6
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
34#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/Support/Debug.h"
44#include <optional>
45
46using namespace llvm;
47
48#define DEBUG_TYPE "riscv-lower"
49
50STATISTIC(NumTailCalls, "Number of tail calls");
51
53 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
54 cl::desc("Give the maximum size (in number of nodes) of the web of "
55 "instructions that we will consider for VW expansion"),
56 cl::init(18));
57
58static cl::opt<bool>
59 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
60 cl::desc("Allow the formation of VW_W operations (e.g., "
61 "VWADD_W) with splat constants"),
62 cl::init(false));
63
65 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
66 cl::desc("Set the minimum number of repetitions of a divisor to allow "
67 "transformation to multiplications by the reciprocal"),
68 cl::init(2));
69
70static cl::opt<int>
72 cl::desc("Give the maximum number of instructions that we will "
73 "use for creating a floating-point immediate value"),
74 cl::init(2));
75
77 const RISCVSubtarget &STI)
78 : TargetLowering(TM), Subtarget(STI) {
79
80 if (Subtarget.isRVE())
81 report_fatal_error("Codegen not yet implemented for RVE");
82
83 RISCVABI::ABI ABI = Subtarget.getTargetABI();
84 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
85
86 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
87 !Subtarget.hasStdExtF()) {
88 errs() << "Hard-float 'f' ABI can't be used for a target that "
89 "doesn't support the F instruction set extension (ignoring "
90 "target-abi)\n";
92 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
93 !Subtarget.hasStdExtD()) {
94 errs() << "Hard-float 'd' ABI can't be used for a target that "
95 "doesn't support the D instruction set extension (ignoring "
96 "target-abi)\n";
98 }
99
100 switch (ABI) {
101 default:
102 report_fatal_error("Don't know how to lower this ABI");
109 break;
110 }
111
112 MVT XLenVT = Subtarget.getXLenVT();
113
114 // Set up the register classes.
115 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
116
117 if (Subtarget.hasStdExtZfhOrZfhmin())
118 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
119 if (Subtarget.hasStdExtZfbfmin())
120 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
121 if (Subtarget.hasStdExtF())
122 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
123 if (Subtarget.hasStdExtD())
124 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
125 if (Subtarget.hasStdExtZhinxOrZhinxmin())
126 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
127 if (Subtarget.hasStdExtZfinx())
128 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
129 if (Subtarget.hasStdExtZdinx()) {
130 if (Subtarget.is64Bit())
131 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
132 else
133 addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass);
134 }
135
136 static const MVT::SimpleValueType BoolVecVTs[] = {
137 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
138 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
139 static const MVT::SimpleValueType IntVecVTs[] = {
140 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
141 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
142 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
143 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
144 MVT::nxv4i64, MVT::nxv8i64};
145 static const MVT::SimpleValueType F16VecVTs[] = {
146 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
147 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
148 static const MVT::SimpleValueType F32VecVTs[] = {
149 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
150 static const MVT::SimpleValueType F64VecVTs[] = {
151 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
152
153 if (Subtarget.hasVInstructions()) {
154 auto addRegClassForRVV = [this](MVT VT) {
155 // Disable the smallest fractional LMUL types if ELEN is less than
156 // RVVBitsPerBlock.
157 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
158 if (VT.getVectorMinNumElements() < MinElts)
159 return;
160
161 unsigned Size = VT.getSizeInBits().getKnownMinValue();
162 const TargetRegisterClass *RC;
164 RC = &RISCV::VRRegClass;
165 else if (Size == 2 * RISCV::RVVBitsPerBlock)
166 RC = &RISCV::VRM2RegClass;
167 else if (Size == 4 * RISCV::RVVBitsPerBlock)
168 RC = &RISCV::VRM4RegClass;
169 else if (Size == 8 * RISCV::RVVBitsPerBlock)
170 RC = &RISCV::VRM8RegClass;
171 else
172 llvm_unreachable("Unexpected size");
173
174 addRegisterClass(VT, RC);
175 };
176
177 for (MVT VT : BoolVecVTs)
178 addRegClassForRVV(VT);
179 for (MVT VT : IntVecVTs) {
180 if (VT.getVectorElementType() == MVT::i64 &&
181 !Subtarget.hasVInstructionsI64())
182 continue;
183 addRegClassForRVV(VT);
184 }
185
186 if (Subtarget.hasVInstructionsF16())
187 for (MVT VT : F16VecVTs)
188 addRegClassForRVV(VT);
189
190 if (Subtarget.hasVInstructionsF32())
191 for (MVT VT : F32VecVTs)
192 addRegClassForRVV(VT);
193
194 if (Subtarget.hasVInstructionsF64())
195 for (MVT VT : F64VecVTs)
196 addRegClassForRVV(VT);
197
198 if (Subtarget.useRVVForFixedLengthVectors()) {
199 auto addRegClassForFixedVectors = [this](MVT VT) {
200 MVT ContainerVT = getContainerForFixedLengthVector(VT);
201 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
202 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
203 addRegisterClass(VT, TRI.getRegClass(RCID));
204 };
206 if (useRVVForFixedLengthVectorVT(VT))
207 addRegClassForFixedVectors(VT);
208
210 if (useRVVForFixedLengthVectorVT(VT))
211 addRegClassForFixedVectors(VT);
212 }
213 }
214
215 // Compute derived properties from the register classes.
217
219
221 MVT::i1, Promote);
222 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
224 MVT::i1, Promote);
225
226 // TODO: add all necessary setOperationAction calls.
227 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
228
229 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
230 setOperationAction(ISD::BR_CC, XLenVT, Expand);
231 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
233
240
241 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
242
243 setOperationAction(ISD::VASTART, MVT::Other, Custom);
244 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
245
247
249
250 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
251 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
252
253 if (Subtarget.is64Bit()) {
255
256 setOperationAction(ISD::LOAD, MVT::i32, Custom);
257
259 MVT::i32, Custom);
260
263 MVT::i32, Custom);
264 } else {
266 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
267 nullptr);
268 setLibcallName(RTLIB::MULO_I64, nullptr);
269 }
270
271 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul())
273 else if (Subtarget.is64Bit())
274 setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
275 else
277
278 if (!Subtarget.hasStdExtM())
280 XLenVT, Expand);
281 else if (Subtarget.is64Bit())
283 {MVT::i8, MVT::i16, MVT::i32}, Custom);
284
287 Expand);
288
290 Custom);
291
292 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
293 if (Subtarget.is64Bit())
295 } else if (Subtarget.hasVendorXTHeadBb()) {
296 if (Subtarget.is64Bit())
299 } else {
301 }
302
303 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
304 // pattern match it directly in isel.
306 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
307 Subtarget.hasVendorXTHeadBb())
308 ? Legal
309 : Expand);
310 // Zbkb can use rev8+brev8 to implement bitreverse.
312 Subtarget.hasStdExtZbkb() ? Custom : Expand);
313
314 if (Subtarget.hasStdExtZbb()) {
316 Legal);
317
318 if (Subtarget.is64Bit())
321 MVT::i32, Custom);
322 } else {
324 }
325
326 if (Subtarget.hasVendorXTHeadBb()) {
328
329 // We need the custom lowering to make sure that the resulting sequence
330 // for the 32bit case is efficient on 64bit targets.
331 if (Subtarget.is64Bit())
333 }
334
335 if (Subtarget.is64Bit())
337
338 if (!Subtarget.hasVendorXTHeadCondMov())
340
341 static const unsigned FPLegalNodeTypes[] = {
342 ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
343 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
348
349 static const ISD::CondCode FPCCToExpand[] = {
353
354 static const unsigned FPOpToExpand[] = {
355 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
356 ISD::FREM};
357
358 static const unsigned FPRndMode[] = {
359 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
360 ISD::FROUNDEVEN};
361
363 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
364
365 if (Subtarget.hasStdExtZfbfmin()) {
366 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
367 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
369 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
370 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
372 }
373
375 if (Subtarget.hasStdExtZfhOrZhinx()) {
376 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
377 setOperationAction(FPRndMode, MVT::f16,
378 Subtarget.hasStdExtZfa() ? Legal : Custom);
381 } else {
382 static const unsigned ZfhminPromoteOps[] = {
383 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
385 ISD::FDIV, ISD::FSQRT, ISD::FABS,
389 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
390 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
391 ISD::FROUNDEVEN, ISD::SELECT};
392
393 setOperationAction(ZfhminPromoteOps, MVT::f16, Promote);
396 MVT::f16, Legal);
397 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
398 // DAGCombiner::visitFP_ROUND probably needs improvements first.
400 }
401
404 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
406 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
407
408 setOperationAction(ISD::FNEARBYINT, MVT::f16,
409 Subtarget.hasStdExtZfa() ? Legal : Promote);
410 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
411 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
412 ISD::FEXP2, ISD::FLOG, ISD::FLOG2, ISD::FLOG10},
413 MVT::f16, Promote);
414
415 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
416 // complete support for all operations in LegalizeDAG.
421 MVT::f16, Promote);
422
423 // We need to custom promote this.
424 if (Subtarget.is64Bit())
425 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
426
427 if (!Subtarget.hasStdExtZfa())
428 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
429 }
430
431 if (Subtarget.hasStdExtFOrZfinx()) {
432 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
433 setOperationAction(FPRndMode, MVT::f32,
434 Subtarget.hasStdExtZfa() ? Legal : Custom);
435 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
438 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
439 setOperationAction(FPOpToExpand, MVT::f32, Expand);
440 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
441 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
443 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
444 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
445 Subtarget.isSoftFPABI() ? LibCall : Custom);
446 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
447 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
448
449 if (Subtarget.hasStdExtZfa())
450 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
451 else
452 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
453 }
454
455 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
456 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
457
458 if (Subtarget.hasStdExtDOrZdinx()) {
459 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
460
461 if (Subtarget.hasStdExtZfa()) {
462 setOperationAction(FPRndMode, MVT::f64, Legal);
463 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
464 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
465 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
466 } else {
467 if (Subtarget.is64Bit())
468 setOperationAction(FPRndMode, MVT::f64, Custom);
469
470 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
471 }
472
475 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
478 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
479 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
480 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
481 setOperationAction(FPOpToExpand, MVT::f64, Expand);
482 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
483 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
485 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
486 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
487 Subtarget.isSoftFPABI() ? LibCall : Custom);
488 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
489 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
490 }
491
492 if (Subtarget.is64Bit()) {
495 MVT::i32, Custom);
496 setOperationAction(ISD::LROUND, MVT::i32, Custom);
497 }
498
499 if (Subtarget.hasStdExtFOrZfinx()) {
501 Custom);
502
505 XLenVT, Legal);
506
508 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
509 }
510
513 XLenVT, Custom);
514
516
517 if (Subtarget.is64Bit())
519
520 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
521 // Unfortunately this can't be determined just from the ISA naming string.
522 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
523 Subtarget.is64Bit() ? Legal : Custom);
524
525 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
527 if (Subtarget.is64Bit())
529
530 if (Subtarget.hasStdExtZicbop()) {
531 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
532 }
533
534 if (Subtarget.hasStdExtA()) {
537 } else if (Subtarget.hasForcedAtomics()) {
539 } else {
541 }
542
543 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
544
546
547 if (Subtarget.hasVInstructions()) {
549
550 setOperationAction(ISD::VSCALE, XLenVT, Custom);
551
552 // RVV intrinsics may have illegal operands.
553 // We also need to custom legalize vmv.x.s.
556 {MVT::i8, MVT::i16}, Custom);
557 if (Subtarget.is64Bit())
559 MVT::i32, Custom);
560 else
562 MVT::i64, Custom);
563
565 MVT::Other, Custom);
566
567 static const unsigned IntegerVPOps[] = {
568 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
569 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
570 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
571 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
572 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
573 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
574 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
575 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
576 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
577 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
578 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
579 ISD::VP_ABS};
580
581 static const unsigned FloatingPointVPOps[] = {
582 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
583 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
584 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
585 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
586 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
587 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
588 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
589 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
590 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
591 ISD::VP_FRINT, ISD::VP_FNEARBYINT};
592
593 static const unsigned IntegerVecReduceOps[] = {
594 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
595 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
596 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
597
598 static const unsigned FloatingPointVecReduceOps[] = {
599 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
600 ISD::VECREDUCE_FMAX};
601
602 if (!Subtarget.is64Bit()) {
603 // We must custom-lower certain vXi64 operations on RV32 due to the vector
604 // element type being illegal.
606 MVT::i64, Custom);
607
608 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
609
610 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
611 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
612 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
613 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
614 MVT::i64, Custom);
615 }
616
617 for (MVT VT : BoolVecVTs) {
618 if (!isTypeLegal(VT))
619 continue;
620
622
623 // Mask VTs are custom-expanded into a series of standard nodes
627 VT, Custom);
628
630 Custom);
631
634 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
635 Expand);
636
637 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
638
640 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
641 Custom);
642
644 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
645 Custom);
646
647 // RVV has native int->float & float->int conversions where the
648 // element type sizes are within one power-of-two of each other. Any
649 // wider distances between type sizes have to be lowered as sequences
650 // which progressively narrow the gap in stages.
655 VT, Custom);
657 Custom);
658
659 // Expand all extending loads to types larger than this, and truncating
660 // stores from types larger than this.
662 setTruncStoreAction(OtherVT, VT, Expand);
664 VT, Expand);
665 }
666
667 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
668 ISD::VP_TRUNCATE, ISD::VP_SETCC},
669 VT, Custom);
670
673
675
678 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
679 }
680
681 for (MVT VT : IntVecVTs) {
682 if (!isTypeLegal(VT))
683 continue;
684
687
688 // Vectors implement MULHS/MULHU.
690
691 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
692 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
694
696 Legal);
697
698 setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand);
699
700 // Custom-lower extensions and truncations from/to mask types.
702 VT, Custom);
703
704 // RVV has native int->float & float->int conversions where the
705 // element type sizes are within one power-of-two of each other. Any
706 // wider distances between type sizes have to be lowered as sequences
707 // which progressively narrow the gap in stages.
712 VT, Custom);
714 Custom);
715
718
719 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
720 // nodes which truncate by one power of two at a time.
722
723 // Custom-lower insert/extract operations to simplify patterns.
725 Custom);
726
727 // Custom-lower reduction operations to set up the corresponding custom
728 // nodes' operands.
729 setOperationAction(IntegerVecReduceOps, VT, Custom);
730
731 setOperationAction(IntegerVPOps, VT, Custom);
732
733 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
734
735 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
736 VT, Custom);
737
739 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
740 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
741 VT, Custom);
742
745 VT, Custom);
746
749
751
753 setTruncStoreAction(VT, OtherVT, Expand);
755 VT, Expand);
756 }
757
760
761 // Splice
763
764 if (Subtarget.hasStdExtZvbb()) {
766 setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Custom);
767 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
768 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
769 VT, Custom);
770 } else {
772 setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Expand);
774 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
775 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
776 VT, Expand);
777
778 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
779 // range of f32.
780 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
781 if (isTypeLegal(FloatVT)) {
783 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
784 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
785 VT, Custom);
786 }
787
789 }
790 }
791
792 // Expand various CCs to best match the RVV ISA, which natively supports UNE
793 // but no other unordered comparisons, and supports all ordered comparisons
794 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
795 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
796 // and we pattern-match those back to the "original", swapping operands once
797 // more. This way we catch both operations and both "vf" and "fv" forms with
798 // fewer patterns.
799 static const ISD::CondCode VFPCCToExpand[] = {
803 };
804
805 // Sets common operation actions on RVV floating-point vector types.
806 const auto SetCommonVFPActions = [&](MVT VT) {
808 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
809 // sizes are within one power-of-two of each other. Therefore conversions
810 // between vXf16 and vXf64 must be lowered as sequences which convert via
811 // vXf32.
812 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
813 // Custom-lower insert/extract operations to simplify patterns.
815 Custom);
816 // Expand various condition codes (explained above).
817 setCondCodeAction(VFPCCToExpand, VT, Expand);
818
819 setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
820
821 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
822 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
824 VT, Custom);
825
826 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
827
828 // Expand FP operations that need libcalls.
830 setOperationAction(ISD::FPOW, VT, Expand);
831 setOperationAction(ISD::FCOS, VT, Expand);
832 setOperationAction(ISD::FSIN, VT, Expand);
833 setOperationAction(ISD::FSINCOS, VT, Expand);
834 setOperationAction(ISD::FEXP, VT, Expand);
835 setOperationAction(ISD::FEXP2, VT, Expand);
836 setOperationAction(ISD::FLOG, VT, Expand);
837 setOperationAction(ISD::FLOG2, VT, Expand);
838 setOperationAction(ISD::FLOG10, VT, Expand);
839
841
842 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
843
844 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
845 VT, Custom);
846
848 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
849 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
850 VT, Custom);
851
854
857 VT, Custom);
858
861
863
864 setOperationAction(FloatingPointVPOps, VT, Custom);
865
867 Custom);
870 VT, Legal);
875 VT, Custom);
876 };
877
878 // Sets common extload/truncstore actions on RVV floating-point vector
879 // types.
880 const auto SetCommonVFPExtLoadTruncStoreActions =
881 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
882 for (auto SmallVT : SmallerVTs) {
883 setTruncStoreAction(VT, SmallVT, Expand);
884 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
885 }
886 };
887
888 if (Subtarget.hasVInstructionsF16()) {
889 for (MVT VT : F16VecVTs) {
890 if (!isTypeLegal(VT))
891 continue;
892 SetCommonVFPActions(VT);
893 }
894 }
895
896 if (Subtarget.hasVInstructionsF32()) {
897 for (MVT VT : F32VecVTs) {
898 if (!isTypeLegal(VT))
899 continue;
900 SetCommonVFPActions(VT);
901 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
902 }
903 }
904
905 if (Subtarget.hasVInstructionsF64()) {
906 for (MVT VT : F64VecVTs) {
907 if (!isTypeLegal(VT))
908 continue;
909 SetCommonVFPActions(VT);
910 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
911 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
912 }
913 }
914
915 if (Subtarget.useRVVForFixedLengthVectors()) {
917 if (!useRVVForFixedLengthVectorVT(VT))
918 continue;
919
920 // By default everything must be expanded.
921 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
924 setTruncStoreAction(VT, OtherVT, Expand);
926 OtherVT, VT, Expand);
927 }
928
929 // Custom lower fixed vector undefs to scalable vector undefs to avoid
930 // expansion to a build_vector of 0s.
932
933 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
935 Custom);
936
938 Custom);
939
941 VT, Custom);
942
944
945 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
946
948
950
952
953 setOperationAction(ISD::BITCAST, VT, Custom);
954
956 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
957 Custom);
958
960 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
961 Custom);
962
964 {
973 },
974 VT, Custom);
976 Custom);
977
979
980 // Operations below are different for between masks and other vectors.
981 if (VT.getVectorElementType() == MVT::i1) {
982 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
984 VT, Custom);
985
986 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
987 ISD::VP_SETCC, ISD::VP_TRUNCATE},
988 VT, Custom);
989 continue;
990 }
991
992 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
993 // it before type legalization for i64 vectors on RV32. It will then be
994 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
995 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
996 // improvements first.
997 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1000 }
1001
1003 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1004
1005 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1006 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1007 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1008 ISD::VP_SCATTER},
1009 VT, Custom);
1010
1014 VT, Custom);
1015
1018
1019 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1020 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1022
1025 Custom);
1026
1029
1032
1033 // Custom-lower reduction operations to set up the corresponding custom
1034 // nodes' operands.
1035 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1036 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1037 ISD::VECREDUCE_UMIN},
1038 VT, Custom);
1039
1040 setOperationAction(IntegerVPOps, VT, Custom);
1041
1042 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1043 // range of f32.
1044 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1045 if (isTypeLegal(FloatVT))
1048 Custom);
1049 }
1050
1052 // There are no extending loads or truncating stores.
1053 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1054 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1055 setTruncStoreAction(VT, InnerVT, Expand);
1056 }
1057
1058 if (!useRVVForFixedLengthVectorVT(VT))
1059 continue;
1060
1061 // By default everything must be expanded.
1062 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1064
1065 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1066 // expansion to a build_vector of 0s.
1068
1069 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1071 Custom);
1072
1076 VT, Custom);
1077
1078 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1079 ISD::MGATHER, ISD::MSCATTER},
1080 VT, Custom);
1081
1082 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1083 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1084 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1085 ISD::VP_SCATTER},
1086 VT, Custom);
1087
1089 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1090 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1092 VT, Custom);
1093
1094 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1095
1096 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1097 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
1098 VT, Custom);
1099
1100 setCondCodeAction(VFPCCToExpand, VT, Expand);
1101
1105
1106 setOperationAction(ISD::BITCAST, VT, Custom);
1107
1108 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1109
1110 setOperationAction(FloatingPointVPOps, VT, Custom);
1111
1113 Custom);
1120 VT, Custom);
1121 }
1122
1123 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1124 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1125 Custom);
1127 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1128 if (Subtarget.hasStdExtFOrZfinx())
1129 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1130 if (Subtarget.hasStdExtDOrZdinx())
1131 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1132 }
1133 }
1134
1135 if (Subtarget.hasForcedAtomics()) {
1136 // Set atomic rmw/cas operations to expand to force __sync libcalls.
1138 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1139 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1140 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1141 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1142 XLenVT, Expand);
1143 }
1144
1145 if (Subtarget.hasVendorXTHeadMemIdx()) {
1146 for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
1147 ++im) {
1148 setIndexedLoadAction(im, MVT::i8, Legal);
1149 setIndexedStoreAction(im, MVT::i8, Legal);
1150 setIndexedLoadAction(im, MVT::i16, Legal);
1151 setIndexedStoreAction(im, MVT::i16, Legal);
1152 setIndexedLoadAction(im, MVT::i32, Legal);
1153 setIndexedStoreAction(im, MVT::i32, Legal);
1154
1155 if (Subtarget.is64Bit()) {
1156 setIndexedLoadAction(im, MVT::i64, Legal);
1157 setIndexedStoreAction(im, MVT::i64, Legal);
1158 }
1159 }
1160 }
1161
1162 // Function alignments.
1163 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1164 setMinFunctionAlignment(FunctionAlignment);
1165 // Set preferred alignments.
1168
1170
1171 // Jumps are expensive, compared to logic
1173
1177 if (Subtarget.is64Bit())
1179
1180 if (Subtarget.hasStdExtFOrZfinx())
1181 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
1182
1183 if (Subtarget.hasStdExtZbb())
1185
1186 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1188
1189 if (Subtarget.hasStdExtZbkb())
1193 if (Subtarget.hasStdExtFOrZfinx())
1196 if (Subtarget.hasVInstructions())
1197 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1198 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1199 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
1201 if (Subtarget.hasVendorXTHeadMemPair())
1202 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1203 if (Subtarget.useRVVForFixedLengthVectors())
1204 setTargetDAGCombine(ISD::BITCAST);
1205
1206 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1207 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1208
1209 // Disable strict node mutation.
1210 IsStrictFPEnabled = true;
1211}
1212
1214 LLVMContext &Context,
1215 EVT VT) const {
1216 if (!VT.isVector())
1217 return getPointerTy(DL);
1218 if (Subtarget.hasVInstructions() &&
1219 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1220 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1222}
1223
1224MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1225 return Subtarget.getXLenVT();
1226}
1227
1228// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1229bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1230 unsigned VF,
1231 bool IsScalable) const {
1232 if (!Subtarget.hasVInstructions())
1233 return true;
1234
1235 if (!IsScalable)
1236 return true;
1237
1238 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1239 return true;
1240
1241 // Don't allow VF=1 if those types are't legal.
1242 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN())
1243 return true;
1244
1245 // VLEN=32 support is incomplete.
1246 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1247 return true;
1248
1249 // The maximum VF is for the smallest element width with LMUL=8.
1250 // VF must be a power of 2.
1251 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1252 return VF > MaxVF || !isPowerOf2_32(VF);
1253}
1254
1256 const CallInst &I,
1257 MachineFunction &MF,
1258 unsigned Intrinsic) const {
1259 auto &DL = I.getModule()->getDataLayout();
1260
1261 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1262 bool IsUnitStrided) {
1263 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1264 Info.ptrVal = I.getArgOperand(PtrOp);
1265 Type *MemTy;
1266 if (IsStore) {
1267 // Store value is the first operand.
1268 MemTy = I.getArgOperand(0)->getType();
1269 } else {
1270 // Use return type. If it's segment load, return type is a struct.
1271 MemTy = I.getType();
1272 if (MemTy->isStructTy())
1273 MemTy = MemTy->getStructElementType(0);
1274 }
1275 if (!IsUnitStrided)
1276 MemTy = MemTy->getScalarType();
1277
1278 Info.memVT = getValueType(DL, MemTy);
1279 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1280 Info.size = MemoryLocation::UnknownSize;
1281 Info.flags |=
1283 return true;
1284 };
1285
1286 if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
1288
1290 switch (Intrinsic) {
1291 default:
1292 return false;
1293 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1294 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1295 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1296 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1297 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1298 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1299 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1300 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1301 case Intrinsic::riscv_masked_cmpxchg_i32:
1302 Info.opc = ISD::INTRINSIC_W_CHAIN;
1303 Info.memVT = MVT::i32;
1304 Info.ptrVal = I.getArgOperand(0);
1305 Info.offset = 0;
1306 Info.align = Align(4);
1309 return true;
1310 case Intrinsic::riscv_masked_strided_load:
1311 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1312 /*IsUnitStrided*/ false);
1313 case Intrinsic::riscv_masked_strided_store:
1314 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1315 /*IsUnitStrided*/ false);
1316 case Intrinsic::riscv_seg2_load:
1317 case Intrinsic::riscv_seg3_load:
1318 case Intrinsic::riscv_seg4_load:
1319 case Intrinsic::riscv_seg5_load:
1320 case Intrinsic::riscv_seg6_load:
1321 case Intrinsic::riscv_seg7_load:
1322 case Intrinsic::riscv_seg8_load:
1323 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1324 /*IsUnitStrided*/ false);
1325 case Intrinsic::riscv_seg2_store:
1326 case Intrinsic::riscv_seg3_store:
1327 case Intrinsic::riscv_seg4_store:
1328 case Intrinsic::riscv_seg5_store:
1329 case Intrinsic::riscv_seg6_store:
1330 case Intrinsic::riscv_seg7_store:
1331 case Intrinsic::riscv_seg8_store:
1332 // Operands are (vec, ..., vec, ptr, vl)
1333 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1334 /*IsStore*/ true,
1335 /*IsUnitStrided*/ false);
1336 case Intrinsic::riscv_vle:
1337 case Intrinsic::riscv_vle_mask:
1338 case Intrinsic::riscv_vleff:
1339 case Intrinsic::riscv_vleff_mask:
1340 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1341 /*IsStore*/ false,
1342 /*IsUnitStrided*/ true);
1343 case Intrinsic::riscv_vse:
1344 case Intrinsic::riscv_vse_mask:
1345 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1346 /*IsStore*/ true,
1347 /*IsUnitStrided*/ true);
1348 case Intrinsic::riscv_vlse:
1349 case Intrinsic::riscv_vlse_mask:
1350 case Intrinsic::riscv_vloxei:
1351 case Intrinsic::riscv_vloxei_mask:
1352 case Intrinsic::riscv_vluxei:
1353 case Intrinsic::riscv_vluxei_mask:
1354 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1355 /*IsStore*/ false,
1356 /*IsUnitStrided*/ false);
1357 case Intrinsic::riscv_vsse:
1358 case Intrinsic::riscv_vsse_mask:
1359 case Intrinsic::riscv_vsoxei:
1360 case Intrinsic::riscv_vsoxei_mask:
1361 case Intrinsic::riscv_vsuxei:
1362 case Intrinsic::riscv_vsuxei_mask:
1363 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1364 /*IsStore*/ true,
1365 /*IsUnitStrided*/ false);
1366 case Intrinsic::riscv_vlseg2:
1367 case Intrinsic::riscv_vlseg3:
1368 case Intrinsic::riscv_vlseg4:
1369 case Intrinsic::riscv_vlseg5:
1370 case Intrinsic::riscv_vlseg6:
1371 case Intrinsic::riscv_vlseg7:
1372 case Intrinsic::riscv_vlseg8:
1373 case Intrinsic::riscv_vlseg2ff:
1374 case Intrinsic::riscv_vlseg3ff:
1375 case Intrinsic::riscv_vlseg4ff:
1376 case Intrinsic::riscv_vlseg5ff:
1377 case Intrinsic::riscv_vlseg6ff:
1378 case Intrinsic::riscv_vlseg7ff:
1379 case Intrinsic::riscv_vlseg8ff:
1380 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1381 /*IsStore*/ false,
1382 /*IsUnitStrided*/ false);
1383 case Intrinsic::riscv_vlseg2_mask:
1384 case Intrinsic::riscv_vlseg3_mask:
1385 case Intrinsic::riscv_vlseg4_mask:
1386 case Intrinsic::riscv_vlseg5_mask:
1387 case Intrinsic::riscv_vlseg6_mask:
1388 case Intrinsic::riscv_vlseg7_mask:
1389 case Intrinsic::riscv_vlseg8_mask:
1390 case Intrinsic::riscv_vlseg2ff_mask:
1391 case Intrinsic::riscv_vlseg3ff_mask:
1392 case Intrinsic::riscv_vlseg4ff_mask:
1393 case Intrinsic::riscv_vlseg5ff_mask:
1394 case Intrinsic::riscv_vlseg6ff_mask:
1395 case Intrinsic::riscv_vlseg7ff_mask:
1396 case Intrinsic::riscv_vlseg8ff_mask:
1397 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1398 /*IsStore*/ false,
1399 /*IsUnitStrided*/ false);
1400 case Intrinsic::riscv_vlsseg2:
1401 case Intrinsic::riscv_vlsseg3:
1402 case Intrinsic::riscv_vlsseg4:
1403 case Intrinsic::riscv_vlsseg5:
1404 case Intrinsic::riscv_vlsseg6:
1405 case Intrinsic::riscv_vlsseg7:
1406 case Intrinsic::riscv_vlsseg8:
1407 case Intrinsic::riscv_vloxseg2:
1408 case Intrinsic::riscv_vloxseg3:
1409 case Intrinsic::riscv_vloxseg4:
1410 case Intrinsic::riscv_vloxseg5:
1411 case Intrinsic::riscv_vloxseg6:
1412 case Intrinsic::riscv_vloxseg7:
1413 case Intrinsic::riscv_vloxseg8:
1414 case Intrinsic::riscv_vluxseg2:
1415 case Intrinsic::riscv_vluxseg3:
1416 case Intrinsic::riscv_vluxseg4:
1417 case Intrinsic::riscv_vluxseg5:
1418 case Intrinsic::riscv_vluxseg6:
1419 case Intrinsic::riscv_vluxseg7:
1420 case Intrinsic::riscv_vluxseg8:
1421 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1422 /*IsStore*/ false,
1423 /*IsUnitStrided*/ false);
1424 case Intrinsic::riscv_vlsseg2_mask:
1425 case Intrinsic::riscv_vlsseg3_mask:
1426 case Intrinsic::riscv_vlsseg4_mask:
1427 case Intrinsic::riscv_vlsseg5_mask:
1428 case Intrinsic::riscv_vlsseg6_mask:
1429 case Intrinsic::riscv_vlsseg7_mask:
1430 case Intrinsic::riscv_vlsseg8_mask:
1431 case Intrinsic::riscv_vloxseg2_mask:
1432 case Intrinsic::riscv_vloxseg3_mask:
1433 case Intrinsic::riscv_vloxseg4_mask:
1434 case Intrinsic::riscv_vloxseg5_mask:
1435 case Intrinsic::riscv_vloxseg6_mask:
1436 case Intrinsic::riscv_vloxseg7_mask:
1437 case Intrinsic::riscv_vloxseg8_mask:
1438 case Intrinsic::riscv_vluxseg2_mask:
1439 case Intrinsic::riscv_vluxseg3_mask:
1440 case Intrinsic::riscv_vluxseg4_mask:
1441 case Intrinsic::riscv_vluxseg5_mask:
1442 case Intrinsic::riscv_vluxseg6_mask:
1443 case Intrinsic::riscv_vluxseg7_mask:
1444 case Intrinsic::riscv_vluxseg8_mask:
1445 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1446 /*IsStore*/ false,
1447 /*IsUnitStrided*/ false);
1448 case Intrinsic::riscv_vsseg2:
1449 case Intrinsic::riscv_vsseg3:
1450 case Intrinsic::riscv_vsseg4:
1451 case Intrinsic::riscv_vsseg5:
1452 case Intrinsic::riscv_vsseg6:
1453 case Intrinsic::riscv_vsseg7:
1454 case Intrinsic::riscv_vsseg8:
1455 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1456 /*IsStore*/ true,
1457 /*IsUnitStrided*/ false);
1458 case Intrinsic::riscv_vsseg2_mask:
1459 case Intrinsic::riscv_vsseg3_mask:
1460 case Intrinsic::riscv_vsseg4_mask:
1461 case Intrinsic::riscv_vsseg5_mask:
1462 case Intrinsic::riscv_vsseg6_mask:
1463 case Intrinsic::riscv_vsseg7_mask:
1464 case Intrinsic::riscv_vsseg8_mask:
1465 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1466 /*IsStore*/ true,
1467 /*IsUnitStrided*/ false);
1468 case Intrinsic::riscv_vssseg2:
1469 case Intrinsic::riscv_vssseg3:
1470 case Intrinsic::riscv_vssseg4:
1471 case Intrinsic::riscv_vssseg5:
1472 case Intrinsic::riscv_vssseg6:
1473 case Intrinsic::riscv_vssseg7:
1474 case Intrinsic::riscv_vssseg8:
1475 case Intrinsic::riscv_vsoxseg2:
1476 case Intrinsic::riscv_vsoxseg3:
1477 case Intrinsic::riscv_vsoxseg4:
1478 case Intrinsic::riscv_vsoxseg5:
1479 case Intrinsic::riscv_vsoxseg6:
1480 case Intrinsic::riscv_vsoxseg7:
1481 case Intrinsic::riscv_vsoxseg8:
1482 case Intrinsic::riscv_vsuxseg2:
1483 case Intrinsic::riscv_vsuxseg3:
1484 case Intrinsic::riscv_vsuxseg4:
1485 case Intrinsic::riscv_vsuxseg5:
1486 case Intrinsic::riscv_vsuxseg6:
1487 case Intrinsic::riscv_vsuxseg7:
1488 case Intrinsic::riscv_vsuxseg8:
1489 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1490 /*IsStore*/ true,
1491 /*IsUnitStrided*/ false);
1492 case Intrinsic::riscv_vssseg2_mask:
1493 case Intrinsic::riscv_vssseg3_mask:
1494 case Intrinsic::riscv_vssseg4_mask:
1495 case Intrinsic::riscv_vssseg5_mask:
1496 case Intrinsic::riscv_vssseg6_mask:
1497 case Intrinsic::riscv_vssseg7_mask:
1498 case Intrinsic::riscv_vssseg8_mask:
1499 case Intrinsic::riscv_vsoxseg2_mask:
1500 case Intrinsic::riscv_vsoxseg3_mask:
1501 case Intrinsic::riscv_vsoxseg4_mask:
1502 case Intrinsic::riscv_vsoxseg5_mask:
1503 case Intrinsic::riscv_vsoxseg6_mask:
1504 case Intrinsic::riscv_vsoxseg7_mask:
1505 case Intrinsic::riscv_vsoxseg8_mask:
1506 case Intrinsic::riscv_vsuxseg2_mask:
1507 case Intrinsic::riscv_vsuxseg3_mask:
1508 case Intrinsic::riscv_vsuxseg4_mask:
1509 case Intrinsic::riscv_vsuxseg5_mask:
1510 case Intrinsic::riscv_vsuxseg6_mask:
1511 case Intrinsic::riscv_vsuxseg7_mask:
1512 case Intrinsic::riscv_vsuxseg8_mask:
1513 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1514 /*IsStore*/ true,
1515 /*IsUnitStrided*/ false);
1516 }
1517}
1518
1520 const AddrMode &AM, Type *Ty,
1521 unsigned AS,
1522 Instruction *I) const {
1523 // No global is ever allowed as a base.
1524 if (AM.BaseGV)
1525 return false;
1526
1527 // RVV instructions only support register addressing.
1528 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1529 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1530
1531 // Require a 12-bit signed offset.
1532 if (!isInt<12>(AM.BaseOffs))
1533 return false;
1534
1535 switch (AM.Scale) {
1536 case 0: // "r+i" or just "i", depending on HasBaseReg.
1537 break;
1538 case 1:
1539 if (!AM.HasBaseReg) // allow "r+i".
1540 break;
1541 return false; // disallow "r+r" or "r+r+i".
1542 default:
1543 return false;
1544 }
1545
1546 return true;
1547}
1548
1550 return isInt<12>(Imm);
1551}
1552
1554 return isInt<12>(Imm);
1555}
1556
1557// On RV32, 64-bit integers are split into their high and low parts and held
1558// in two different registers, so the trunc is free since the low register can
1559// just be used.
1560// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1561// isTruncateFree?
1563 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1564 return false;
1565 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1566 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1567 return (SrcBits == 64 && DestBits == 32);
1568}
1569
1571 // We consider i64->i32 free on RV64 since we have good selection of W
1572 // instructions that make promoting operations back to i64 free in many cases.
1573 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1574 !DstVT.isInteger())
1575 return false;
1576 unsigned SrcBits = SrcVT.getSizeInBits();
1577 unsigned DestBits = DstVT.getSizeInBits();
1578 return (SrcBits == 64 && DestBits == 32);
1579}
1580
1582 // Zexts are free if they can be combined with a load.
1583 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1584 // poorly with type legalization of compares preferring sext.
1585 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1586 EVT MemVT = LD->getMemoryVT();
1587 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1588 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1589 LD->getExtensionType() == ISD::ZEXTLOAD))
1590 return true;
1591 }
1592
1593 return TargetLowering::isZExtFree(Val, VT2);
1594}
1595
1597 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1598}
1599
1601 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1602}
1603
1605 return Subtarget.hasStdExtZbb();
1606}
1607
1609 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
1610}
1611
1613 const Instruction &AndI) const {
1614 // We expect to be able to match a bit extraction instruction if the Zbs
1615 // extension is supported and the mask is a power of two. However, we
1616 // conservatively return false if the mask would fit in an ANDI instruction,
1617 // on the basis that it's possible the sinking+duplication of the AND in
1618 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1619 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1620 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1621 return false;
1623 if (!Mask)
1624 return false;
1625 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1626}
1627
1629 EVT VT = Y.getValueType();
1630
1631 // FIXME: Support vectors once we have tests.
1632 if (VT.isVector())
1633 return false;
1634
1635 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1637}
1638
1640 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1641 if (Subtarget.hasStdExtZbs())
1642 return X.getValueType().isScalarInteger();
1643 auto *C = dyn_cast<ConstantSDNode>(Y);
1644 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1645 if (Subtarget.hasVendorXTHeadBs())
1646 return C != nullptr;
1647 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1648 return C && C->getAPIntValue().ule(10);
1649}
1650
1652 EVT VT) const {
1653 // Only enable for rvv.
1654 if (!VT.isVector() || !Subtarget.hasVInstructions())
1655 return false;
1656
1657 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1658 return false;
1659
1660 return true;
1661}
1662
1664 Type *Ty) const {
1665 assert(Ty->isIntegerTy());
1666
1667 unsigned BitSize = Ty->getIntegerBitWidth();
1668 if (BitSize > Subtarget.getXLen())
1669 return false;
1670
1671 // Fast path, assume 32-bit immediates are cheap.
1672 int64_t Val = Imm.getSExtValue();
1673 if (isInt<32>(Val))
1674 return true;
1675
1676 // A constant pool entry may be more aligned thant he load we're trying to
1677 // replace. If we don't support unaligned scalar mem, prefer the constant
1678 // pool.
1679 // TODO: Can the caller pass down the alignment?
1680 if (!Subtarget.enableUnalignedScalarMem())
1681 return true;
1682
1683 // Prefer to keep the load if it would require many instructions.
1684 // This uses the same threshold we use for constant pools but doesn't
1685 // check useConstantPoolForLargeInts.
1686 // TODO: Should we keep the load only when we're definitely going to emit a
1687 // constant pool?
1688
1690 RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
1691 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1692}
1693
1697 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1698 SelectionDAG &DAG) const {
1699 // One interesting pattern that we'd want to form is 'bit extract':
1700 // ((1 >> Y) & 1) ==/!= 0
1701 // But we also need to be careful not to try to reverse that fold.
1702
1703 // Is this '((1 >> Y) & 1)'?
1704 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1705 return false; // Keep the 'bit extract' pattern.
1706
1707 // Will this be '((1 >> Y) & 1)' after the transform?
1708 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1709 return true; // Do form the 'bit extract' pattern.
1710
1711 // If 'X' is a constant, and we transform, then we will immediately
1712 // try to undo the fold, thus causing endless combine loop.
1713 // So only do the transform if X is not a constant. This matches the default
1714 // implementation of this function.
1715 return !XC;
1716}
1717
1718bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1719 switch (Opcode) {
1720 case Instruction::Add:
1721 case Instruction::Sub:
1722 case Instruction::Mul:
1723 case Instruction::And:
1724 case Instruction::Or:
1725 case Instruction::Xor:
1726 case Instruction::FAdd:
1727 case Instruction::FSub:
1728 case Instruction::FMul:
1729 case Instruction::FDiv:
1730 case Instruction::ICmp:
1731 case Instruction::FCmp:
1732 return true;
1733 case Instruction::Shl:
1734 case Instruction::LShr:
1735 case Instruction::AShr:
1736 case Instruction::UDiv:
1737 case Instruction::SDiv:
1738 case Instruction::URem:
1739 case Instruction::SRem:
1740 return Operand == 1;
1741 default:
1742 return false;
1743 }
1744}
1745
1746
1748 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1749 return false;
1750
1751 if (canSplatOperand(I->getOpcode(), Operand))
1752 return true;
1753
1754 auto *II = dyn_cast<IntrinsicInst>(I);
1755 if (!II)
1756 return false;
1757
1758 switch (II->getIntrinsicID()) {
1759 case Intrinsic::fma:
1760 case Intrinsic::vp_fma:
1761 return Operand == 0 || Operand == 1;
1762 case Intrinsic::vp_shl:
1763 case Intrinsic::vp_lshr:
1764 case Intrinsic::vp_ashr:
1765 case Intrinsic::vp_udiv:
1766 case Intrinsic::vp_sdiv:
1767 case Intrinsic::vp_urem:
1768 case Intrinsic::vp_srem:
1769 return Operand == 1;
1770 // These intrinsics are commutative.
1771 case Intrinsic::vp_add:
1772 case Intrinsic::vp_mul:
1773 case Intrinsic::vp_and:
1774 case Intrinsic::vp_or:
1775 case Intrinsic::vp_xor:
1776 case Intrinsic::vp_fadd:
1777 case Intrinsic::vp_fmul:
1778 case Intrinsic::vp_icmp:
1779 case Intrinsic::vp_fcmp:
1780 // These intrinsics have 'vr' versions.
1781 case Intrinsic::vp_sub:
1782 case Intrinsic::vp_fsub:
1783 case Intrinsic::vp_fdiv:
1784 return Operand == 0 || Operand == 1;
1785 default:
1786 return false;
1787 }
1788}
1789
1790/// Check if sinking \p I's operands to I's basic block is profitable, because
1791/// the operands can be folded into a target instruction, e.g.
1792/// splats of scalars can fold into vector instructions.
1794 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1795 using namespace llvm::PatternMatch;
1796
1797 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1798 return false;
1799
1800 for (auto OpIdx : enumerate(I->operands())) {
1801 if (!canSplatOperand(I, OpIdx.index()))
1802 continue;
1803
1804 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1805 // Make sure we are not already sinking this operand
1806 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1807 continue;
1808
1809 // We are looking for a splat that can be sunk.
1811 m_Undef(), m_ZeroMask())))
1812 continue;
1813
1814 // Don't sink i1 splats.
1815 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
1816 continue;
1817
1818 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1819 // and vector registers
1820 for (Use &U : Op->uses()) {
1821 Instruction *Insn = cast<Instruction>(U.getUser());
1822 if (!canSplatOperand(Insn, U.getOperandNo()))
1823 return false;
1824 }
1825
1826 Ops.push_back(&Op->getOperandUse(0));
1827 Ops.push_back(&OpIdx.value());
1828 }
1829 return true;
1830}
1831
1833 unsigned Opc = VecOp.getOpcode();
1834
1835 // Assume target opcodes can't be scalarized.
1836 // TODO - do we have any exceptions?
1837 if (Opc >= ISD::BUILTIN_OP_END)
1838 return false;
1839
1840 // If the vector op is not supported, try to convert to scalar.
1841 EVT VecVT = VecOp.getValueType();
1842 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
1843 return true;
1844
1845 // If the vector op is supported, but the scalar op is not, the transform may
1846 // not be worthwhile.
1847 EVT ScalarVT = VecVT.getScalarType();
1848 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
1849}
1850
1852 const GlobalAddressSDNode *GA) const {
1853 // In order to maximise the opportunity for common subexpression elimination,
1854 // keep a separate ADD node for the global address offset instead of folding
1855 // it in the global address node. Later peephole optimisations may choose to
1856 // fold it back in when profitable.
1857 return false;
1858}
1859
1860// Returns 0-31 if the fli instruction is available for the type and this is
1861// legal FP immediate for the type. Returns -1 otherwise.
1863 if (!Subtarget.hasStdExtZfa())
1864 return -1;
1865
1866 bool IsSupportedVT = false;
1867 if (VT == MVT::f16) {
1868 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
1869 } else if (VT == MVT::f32) {
1870 IsSupportedVT = true;
1871 } else if (VT == MVT::f64) {
1872 assert(Subtarget.hasStdExtD() && "Expect D extension");
1873 IsSupportedVT = true;
1874 }
1875
1876 if (!IsSupportedVT)
1877 return -1;
1878
1879 return RISCVLoadFPImm::getLoadFPImm(Imm);
1880}
1881
1883 bool ForCodeSize) const {
1884 bool IsLegalVT = false;
1885 if (VT == MVT::f16)
1886 IsLegalVT = Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin();
1887 else if (VT == MVT::f32)
1888 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
1889 else if (VT == MVT::f64)
1890 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
1891
1892 if (!IsLegalVT)
1893 return false;
1894
1895 if (getLegalZfaFPImm(Imm, VT) >= 0)
1896 return true;
1897
1898 // Cannot create a 64 bit floating-point immediate value for rv32.
1899 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
1900 // td can handle +0.0 or -0.0 already.
1901 // -0.0 can be created by fmv + fneg.
1902 return Imm.isZero();
1903 }
1904 // Special case: the cost for -0.0 is 1.
1905 int Cost = Imm.isNegZero()
1906 ? 1
1907 : RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
1908 Subtarget.getXLen(),
1909 Subtarget.getFeatureBits());
1910 // If the constantpool data is already in cache, only Cost 1 is cheaper.
1911 return Cost < FPImmCost;
1912}
1913
1914// TODO: This is very conservative.
1916 unsigned Index) const {
1918 return false;
1919
1920 // Only support extracting a fixed from a fixed vector for now.
1921 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
1922 return false;
1923
1924 unsigned ResElts = ResVT.getVectorNumElements();
1925 unsigned SrcElts = SrcVT.getVectorNumElements();
1926
1927 // Convervatively only handle extracting half of a vector.
1928 // TODO: Relax this.
1929 if ((ResElts * 2) != SrcElts)
1930 return false;
1931
1932 // The smallest type we can slide is i8.
1933 // TODO: We can extract index 0 from a mask vector without a slide.
1934 if (ResVT.getVectorElementType() == MVT::i1)
1935 return false;
1936
1937 // Slide can support arbitrary index, but we only treat vslidedown.vi as
1938 // cheap.
1939 if (Index >= 32)
1940 return false;
1941
1942 // TODO: We can do arbitrary slidedowns, but for now only support extracting
1943 // the upper half of a vector until we have more test coverage.
1944 return Index == 0 || Index == ResElts;
1945}
1946
1948 CallingConv::ID CC,
1949 EVT VT) const {
1950 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
1951 // We might still end up using a GPR but that will be decided based on ABI.
1952 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
1954 return MVT::f32;
1955
1957}
1958
1960 CallingConv::ID CC,
1961 EVT VT) const {
1962 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
1963 // We might still end up using a GPR but that will be decided based on ABI.
1964 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
1966 return 1;
1967
1969}
1970
1971// Changes the condition code and swaps operands if necessary, so the SetCC
1972// operation matches one of the comparisons supported directly by branches
1973// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1974// with 1/-1.
1975static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1976 ISD::CondCode &CC, SelectionDAG &DAG) {
1977 // If this is a single bit test that can't be handled by ANDI, shift the
1978 // bit to be tested to the MSB and perform a signed compare with 0.
1979 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1980 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1981 isa<ConstantSDNode>(LHS.getOperand(1))) {
1982 uint64_t Mask = LHS.getConstantOperandVal(1);
1983 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1984 unsigned ShAmt = 0;
1985 if (isPowerOf2_64(Mask)) {
1987 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1988 } else {
1989 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1990 }
1991
1992 LHS = LHS.getOperand(0);
1993 if (ShAmt != 0)
1994 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1995 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1996 return;
1997 }
1998 }
1999
2000 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2001 int64_t C = RHSC->getSExtValue();
2002 switch (CC) {
2003 default: break;
2004 case ISD::SETGT:
2005 // Convert X > -1 to X >= 0.
2006 if (C == -1) {
2007 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2008 CC = ISD::SETGE;
2009 return;
2010 }
2011 break;
2012 case ISD::SETLT:
2013 // Convert X < 1 to 0 <= X.
2014 if (C == 1) {
2015 RHS = LHS;
2016 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2017 CC = ISD::SETGE;
2018 return;
2019 }
2020 break;
2021 }
2022 }
2023
2024 switch (CC) {
2025 default:
2026 break;
2027 case ISD::SETGT:
2028 case ISD::SETLE:
2029 case ISD::SETUGT:
2030 case ISD::SETULE:
2032 std::swap(LHS, RHS);
2033 break;
2034 }
2035}
2036
2038 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2039 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2040 if (VT.getVectorElementType() == MVT::i1)
2041 KnownSize *= 8;
2042
2043 switch (KnownSize) {
2044 default:
2045 llvm_unreachable("Invalid LMUL.");
2046 case 8:
2048 case 16:
2050 case 32:
2052 case 64:
2054 case 128:
2056 case 256:
2058 case 512:
2060 }
2061}
2062
2064 switch (LMul) {
2065 default:
2066 llvm_unreachable("Invalid LMUL.");
2071 return RISCV::VRRegClassID;
2073 return RISCV::VRM2RegClassID;
2075 return RISCV::VRM4RegClassID;
2077 return RISCV::VRM8RegClassID;
2078 }
2079}
2080
2082 RISCVII::VLMUL LMUL = getLMUL(VT);
2083 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2084 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2085 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2086 LMUL == RISCVII::VLMUL::LMUL_1) {
2087 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2088 "Unexpected subreg numbering");
2089 return RISCV::sub_vrm1_0 + Index;
2090 }
2091 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2092 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2093 "Unexpected subreg numbering");
2094 return RISCV::sub_vrm2_0 + Index;
2095 }
2096 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2097 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2098 "Unexpected subreg numbering");
2099 return RISCV::sub_vrm4_0 + Index;
2100 }
2101 llvm_unreachable("Invalid vector type.");
2102}
2103
2105 if (VT.getVectorElementType() == MVT::i1)
2106 return RISCV::VRRegClassID;
2107 return getRegClassIDForLMUL(getLMUL(VT));
2108}
2109
2110// Attempt to decompose a subvector insert/extract between VecVT and
2111// SubVecVT via subregister indices. Returns the subregister index that
2112// can perform the subvector insert/extract with the given element index, as
2113// well as the index corresponding to any leftover subvectors that must be
2114// further inserted/extracted within the register class for SubVecVT.
2115std::pair<unsigned, unsigned>
2117 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2118 const RISCVRegisterInfo *TRI) {
2119 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2120 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2121 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2122 "Register classes not ordered");
2123 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2124 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2125 // Try to compose a subregister index that takes us from the incoming
2126 // LMUL>1 register class down to the outgoing one. At each step we half
2127 // the LMUL:
2128 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2129 // Note that this is not guaranteed to find a subregister index, such as
2130 // when we are extracting from one VR type to another.
2131 unsigned SubRegIdx = RISCV::NoSubRegister;
2132 for (const unsigned RCID :
2133 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2134 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2135 VecVT = VecVT.getHalfNumVectorElementsVT();
2136 bool IsHi =
2137 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2138 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2139 getSubregIndexByMVT(VecVT, IsHi));
2140 if (IsHi)
2141 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2142 }
2143 return {SubRegIdx, InsertExtractIdx};
2144}
2145
2146// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2147// stores for those types.
2148bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2149 return !Subtarget.useRVVForFixedLengthVectors() ||
2150 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2151}
2152
2154 if (!ScalarTy.isSimple())
2155 return false;
2156 switch (ScalarTy.getSimpleVT().SimpleTy) {
2157 case MVT::iPTR:
2158 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2159 case MVT::i8:
2160 case MVT::i16:
2161 case MVT::i32:
2162 return true;
2163 case MVT::i64:
2164 return Subtarget.hasVInstructionsI64();
2165 case MVT::f16:
2166 return Subtarget.hasVInstructionsF16();
2167 case MVT::f32:
2168 return Subtarget.hasVInstructionsF32();
2169 case MVT::f64:
2170 return Subtarget.hasVInstructionsF64();
2171 default:
2172 return false;
2173 }
2174}
2175
2176
2177unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2178 return NumRepeatedDivisors;
2179}
2180
2182 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2183 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2184 "Unexpected opcode");
2185 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2186 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2188 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2189 if (!II)
2190 return SDValue();
2191 return Op.getOperand(II->VLOperand + 1 + HasChain);
2192}
2193
2195 const RISCVSubtarget &Subtarget) {
2196 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2197 if (!Subtarget.useRVVForFixedLengthVectors())
2198 return false;
2199
2200 // We only support a set of vector types with a consistent maximum fixed size
2201 // across all supported vector element types to avoid legalization issues.
2202 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2203 // fixed-length vector type we support is 1024 bytes.
2204 if (VT.getFixedSizeInBits() > 1024 * 8)
2205 return false;
2206
2207 unsigned MinVLen = Subtarget.getRealMinVLen();
2208
2209 MVT EltVT = VT.getVectorElementType();
2210
2211 // Don't use RVV for vectors we cannot scalarize if required.
2212 switch (EltVT.SimpleTy) {
2213 // i1 is supported but has different rules.
2214 default:
2215 return false;
2216 case MVT::i1:
2217 // Masks can only use a single register.
2218 if (VT.getVectorNumElements() > MinVLen)
2219 return false;
2220 MinVLen /= 8;
2221 break;
2222 case MVT::i8:
2223 case MVT::i16:
2224 case MVT::i32:
2225 break;
2226 case MVT::i64:
2227 if (!Subtarget.hasVInstructionsI64())
2228 return false;
2229 break;
2230 case MVT::f16:
2231 if (!Subtarget.hasVInstructionsF16())
2232 return false;
2233 break;
2234 case MVT::f32:
2235 if (!Subtarget.hasVInstructionsF32())
2236 return false;
2237 break;
2238 case MVT::f64:
2239 if (!Subtarget.hasVInstructionsF64())
2240 return false;
2241 break;
2242 }
2243
2244 // Reject elements larger than ELEN.
2245 if (EltVT.getSizeInBits() > Subtarget.getELEN())
2246 return false;
2247
2248 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2249 // Don't use RVV for types that don't fit.
2250 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2251 return false;
2252
2253 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2254 // the base fixed length RVV support in place.
2255 if (!VT.isPow2VectorType())
2256 return false;
2257
2258 return true;
2259}
2260
2261bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2262 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2263}
2264
2265// Return the largest legal scalable vector type that matches VT's element type.
2267 const RISCVSubtarget &Subtarget) {
2268 // This may be called before legal types are setup.
2269 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2270 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2271 "Expected legal fixed length vector!");
2272
2273 unsigned MinVLen = Subtarget.getRealMinVLen();
2274 unsigned MaxELen = Subtarget.getELEN();
2275
2276 MVT EltVT = VT.getVectorElementType();
2277 switch (EltVT.SimpleTy) {
2278 default:
2279 llvm_unreachable("unexpected element type for RVV container");
2280 case MVT::i1:
2281 case MVT::i8:
2282 case MVT::i16:
2283 case MVT::i32:
2284 case MVT::i64:
2285 case MVT::f16:
2286 case MVT::f32:
2287 case MVT::f64: {
2288 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2289 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2290 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2291 unsigned NumElts =
2293 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2294 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2295 return MVT::getScalableVectorVT(EltVT, NumElts);
2296 }
2297 }
2298}
2299
2301 const RISCVSubtarget &Subtarget) {
2303 Subtarget);
2304}
2305
2307 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2308}
2309
2310// Grow V to consume an entire RVV register.
2312 const RISCVSubtarget &Subtarget) {
2313 assert(VT.isScalableVector() &&
2314 "Expected to convert into a scalable vector!");
2315 assert(V.getValueType().isFixedLengthVector() &&
2316 "Expected a fixed length vector operand!");
2317 SDLoc DL(V);
2318 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2319 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2320}
2321
2322// Shrink V so it's just big enough to maintain a VT's worth of data.
2324 const RISCVSubtarget &Subtarget) {
2326 "Expected to convert into a fixed length vector!");
2327 assert(V.getValueType().isScalableVector() &&
2328 "Expected a scalable vector operand!");
2329 SDLoc DL(V);
2330 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2331 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2332}
2333
2334/// Return the type of the mask type suitable for masking the provided
2335/// vector type. This is simply an i1 element type vector of the same
2336/// (possibly scalable) length.
2337static MVT getMaskTypeFor(MVT VecVT) {
2338 assert(VecVT.isVector());
2340 return MVT::getVectorVT(MVT::i1, EC);
2341}
2342
2343/// Creates an all ones mask suitable for masking a vector of type VecTy with
2344/// vector length VL. .
2345static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2346 SelectionDAG &DAG) {
2347 MVT MaskVT = getMaskTypeFor(VecVT);
2348 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2349}
2350
2351static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG,
2352 const RISCVSubtarget &Subtarget) {
2353 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2354}
2355
2356static std::pair<SDValue, SDValue>
2357getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2358 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2359 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2360 SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
2361 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2362 return {Mask, VL};
2363}
2364
2365// Gets the two common "VL" operands: an all-ones mask and the vector length.
2366// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2367// the vector type that the fixed-length vector is contained in. Otherwise if
2368// VecVT is scalable, then ContainerVT should be the same as VecVT.
2369static std::pair<SDValue, SDValue>
2370getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2371 const RISCVSubtarget &Subtarget) {
2372 if (VecVT.isFixedLengthVector())
2373 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2374 Subtarget);
2375 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2376 MVT XLenVT = Subtarget.getXLenVT();
2377 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
2378 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2379 return {Mask, VL};
2380}
2381
2382// As above but assuming the given type is a scalable vector type.
2383static std::pair<SDValue, SDValue>
2385 const RISCVSubtarget &Subtarget) {
2386 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2387 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
2388}
2389
2391 SelectionDAG &DAG) const {
2392 assert(VecVT.isScalableVector() && "Expected scalable vector");
2393 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2394 VecVT.getVectorElementCount());
2395}
2396
2397// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2398// of either is (currently) supported. This can get us into an infinite loop
2399// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2400// as a ..., etc.
2401// Until either (or both) of these can reliably lower any node, reporting that
2402// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2403// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2404// which is not desirable.
2406 EVT VT, unsigned DefinedValues) const {
2407 return false;
2408}
2409
2411 const RISCVSubtarget &Subtarget) {
2412 // RISC-V FP-to-int conversions saturate to the destination register size, but
2413 // don't produce 0 for nan. We can use a conversion instruction and fix the
2414 // nan case with a compare and a select.
2415 SDValue Src = Op.getOperand(0);
2416
2417 MVT DstVT = Op.getSimpleValueType();
2418 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2419
2420 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2421
2422 if (!DstVT.isVector()) {
2423 // In absense of Zfh, promote f16 to f32, then saturate the result.
2424 if (Src.getSimpleValueType() == MVT::f16 &&
2425 !Subtarget.hasStdExtZfhOrZhinx()) {
2426 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2427 }
2428
2429 unsigned Opc;
2430 if (SatVT == DstVT)
2431 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2432 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2434 else
2435 return SDValue();
2436 // FIXME: Support other SatVTs by clamping before or after the conversion.
2437
2438 SDLoc DL(Op);
2439 SDValue FpToInt = DAG.getNode(
2440 Opc, DL, DstVT, Src,
2442
2443 if (Opc == RISCVISD::FCVT_WU_RV64)
2444 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2445
2446 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2447 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2449 }
2450
2451 // Vectors.
2452
2453 MVT DstEltVT = DstVT.getVectorElementType();
2454 MVT SrcVT = Src.getSimpleValueType();
2455 MVT SrcEltVT = SrcVT.getVectorElementType();
2456 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2457 unsigned DstEltSize = DstEltVT.getSizeInBits();
2458
2459 // Only handle saturating to the destination type.
2460 if (SatVT != DstEltVT)
2461 return SDValue();
2462
2463 // FIXME: Don't support narrowing by more than 1 steps for now.
2464 if (SrcEltSize > (2 * DstEltSize))
2465 return SDValue();
2466
2467 MVT DstContainerVT = DstVT;
2468 MVT SrcContainerVT = SrcVT;
2469 if (DstVT.isFixedLengthVector()) {
2470 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2471 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2472 assert(DstContainerVT.getVectorElementCount() ==
2473 SrcContainerVT.getVectorElementCount() &&
2474 "Expected same element count");
2475 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2476 }
2477
2478 SDLoc DL(Op);
2479
2480 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2481
2482 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2483 {Src, Src, DAG.getCondCode(ISD::SETNE),
2484 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2485
2486 // Need to widen by more than 1 step, promote the FP type, then do a widening
2487 // convert.
2488 if (DstEltSize > (2 * SrcEltSize)) {
2489 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2490 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2491 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2492 }
2493
2494 unsigned RVVOpc =
2496 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2497
2498 SDValue SplatZero = DAG.getNode(
2499 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2500 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2501 Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero,
2502 Res, VL);
2503
2504 if (DstVT.isFixedLengthVector())
2505 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2506
2507 return Res;
2508}
2509
2511 switch (Opc) {
2512 case ISD::FROUNDEVEN:
2514 case ISD::VP_FROUNDEVEN:
2515 return RISCVFPRndMode::RNE;
2516 case ISD::FTRUNC:
2517 case ISD::STRICT_FTRUNC:
2518 case ISD::VP_FROUNDTOZERO:
2519 return RISCVFPRndMode::RTZ;
2520 case ISD::FFLOOR:
2521 case ISD::STRICT_FFLOOR:
2522 case ISD::VP_FFLOOR:
2523 return RISCVFPRndMode::RDN;
2524 case ISD::FCEIL:
2525 case ISD::STRICT_FCEIL:
2526 case ISD::VP_FCEIL:
2527 return RISCVFPRndMode::RUP;
2528 case ISD::FROUND:
2529 case ISD::STRICT_FROUND:
2530 case ISD::VP_FROUND:
2531 return RISCVFPRndMode::RMM;
2532 case ISD::FRINT:
2533 return RISCVFPRndMode::DYN;
2534 }
2535
2537}
2538
2539// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2540// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2541// the integer domain and back. Taking care to avoid converting values that are
2542// nan or already correct.
2543static SDValue
2545 const RISCVSubtarget &Subtarget) {
2546 MVT VT = Op.getSimpleValueType();
2547 assert(VT.isVector() && "Unexpected type");
2548
2549 SDLoc DL(Op);
2550
2551 SDValue Src = Op.getOperand(0);
2552
2553 MVT ContainerVT = VT;
2554 if (VT.isFixedLengthVector()) {
2555 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2556 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2557 }
2558
2559 SDValue Mask, VL;
2560 if (Op->isVPOpcode()) {
2561 Mask = Op.getOperand(1);
2562 if (VT.isFixedLengthVector())
2563 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2564 Subtarget);
2565 VL = Op.getOperand(2);
2566 } else {
2567 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2568 }
2569
2570 // Freeze the source since we are increasing the number of uses.
2571 Src = DAG.getFreeze(Src);
2572
2573 // We do the conversion on the absolute value and fix the sign at the end.
2574 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2575
2576 // Determine the largest integer that can be represented exactly. This and
2577 // values larger than it don't have any fractional bits so don't need to
2578 // be converted.
2579 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2580 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2581 APFloat MaxVal = APFloat(FltSem);
2582 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2583 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2584 SDValue MaxValNode =
2585 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2586 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2587 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2588
2589 // If abs(Src) was larger than MaxVal or nan, keep it.
2590 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2591 Mask =
2592 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2593 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2594 Mask, Mask, VL});
2595
2596 // Truncate to integer and convert back to FP.
2597 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2598 MVT XLenVT = Subtarget.getXLenVT();
2599 SDValue Truncated;
2600
2601 switch (Op.getOpcode()) {
2602 default:
2603 llvm_unreachable("Unexpected opcode");
2604 case ISD::FCEIL:
2605 case ISD::VP_FCEIL:
2606 case ISD::FFLOOR:
2607 case ISD::VP_FFLOOR:
2608 case ISD::FROUND:
2609 case ISD::FROUNDEVEN:
2610 case ISD::VP_FROUND:
2611 case ISD::VP_FROUNDEVEN:
2612 case ISD::VP_FROUNDTOZERO: {
2615 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
2616 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
2617 break;
2618 }
2619 case ISD::FTRUNC:
2620 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
2621 Mask, VL);
2622 break;
2623 case ISD::FRINT:
2624 case ISD::VP_FRINT:
2625 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
2626 break;
2627 case ISD::FNEARBYINT:
2628 case ISD::VP_FNEARBYINT:
2629 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
2630 Mask, VL);
2631 break;
2632 }
2633
2634 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2635 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
2636 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
2637 Mask, VL);
2638
2639 // Restore the original sign so that -0.0 is preserved.
2640 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2641 Src, Src, Mask, VL);
2642
2643 if (!VT.isFixedLengthVector())
2644 return Truncated;
2645
2646 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2647}
2648
2649// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
2650// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
2651// qNan and coverting the new source to integer and back to FP.
2652static SDValue
2654 const RISCVSubtarget &Subtarget) {
2655 SDLoc DL(Op);
2656 MVT VT = Op.getSimpleValueType();
2657 SDValue Chain = Op.getOperand(0);
2658 SDValue Src = Op.getOperand(1);
2659
2660 MVT ContainerVT = VT;
2661 if (VT.isFixedLengthVector()) {
2662 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2663 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2664 }
2665
2666 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2667
2668 // Freeze the source since we are increasing the number of uses.
2669 Src = DAG.getFreeze(Src);
2670
2671 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
2672 MVT MaskVT = Mask.getSimpleValueType();
2674 DAG.getVTList(MaskVT, MVT::Other),
2675 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
2676 DAG.getUNDEF(MaskVT), Mask, VL});
2677 Chain = Unorder.getValue(1);
2679 DAG.getVTList(ContainerVT, MVT::Other),
2680 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
2681 Chain = Src.getValue(1);
2682
2683 // We do the conversion on the absolute value and fix the sign at the end.
2684 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2685
2686 // Determine the largest integer that can be represented exactly. This and
2687 // values larger than it don't have any fractional bits so don't need to
2688 // be converted.
2689 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2690 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2691 APFloat MaxVal = APFloat(FltSem);
2692 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2693 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2694 SDValue MaxValNode =
2695 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2696 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2697 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2698
2699 // If abs(Src) was larger than MaxVal or nan, keep it.
2700 Mask = DAG.getNode(
2701 RISCVISD::SETCC_VL, DL, MaskVT,
2702 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
2703
2704 // Truncate to integer and convert back to FP.
2705 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2706 MVT XLenVT = Subtarget.getXLenVT();
2707 SDValue Truncated;
2708
2709 switch (Op.getOpcode()) {
2710 default:
2711 llvm_unreachable("Unexpected opcode");
2712 case ISD::STRICT_FCEIL:
2713 case ISD::STRICT_FFLOOR:
2714 case ISD::STRICT_FROUND:
2718 Truncated = DAG.getNode(
2719 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
2720 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
2721 break;
2722 }
2723 case ISD::STRICT_FTRUNC:
2724 Truncated =
2726 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
2727 break;
2730 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
2731 Mask, VL);
2732 break;
2733 }
2734 Chain = Truncated.getValue(1);
2735
2736 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2737 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
2738 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
2739 DAG.getVTList(ContainerVT, MVT::Other), Chain,
2740 Truncated, Mask, VL);
2741 Chain = Truncated.getValue(1);
2742 }
2743
2744 // Restore the original sign so that -0.0 is preserved.
2745 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2746 Src, Src, Mask, VL);
2747
2748 if (VT.isFixedLengthVector())
2749 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2750 return DAG.getMergeValues({Truncated, Chain}, DL);
2751}
2752
2753static SDValue
2755 const RISCVSubtarget &Subtarget) {
2756 MVT VT = Op.getSimpleValueType();
2757 if (VT.isVector())
2758 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
2759
2760 if (DAG.shouldOptForSize())
2761 return SDValue();
2762
2763 SDLoc DL(Op);
2764 SDValue Src = Op.getOperand(0);
2765
2766 // Create an integer the size of the mantissa with the MSB set. This and all
2767 // values larger than it don't have any fractional bits so don't need to be
2768 // converted.
2769 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
2770 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2771 APFloat MaxVal = APFloat(FltSem);
2772 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2773 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2774 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
2775
2777 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
2778 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
2779}
2780
2781static SDValue
2783 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
2784 SDValue Offset, SDValue Mask, SDValue VL,
2786 if (Merge.isUndef())
2788 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
2789 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
2790 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
2791}
2792
2793static SDValue
2794getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
2796 SDValue VL,
2798 if (Merge.isUndef())
2800 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
2801 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
2802 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
2803}
2804
2808 int64_t Addend;
2809};
2810
2811static std::optional<uint64_t> getExactInteger(const APFloat &APF,
2813 APSInt ValInt(BitWidth, !APF.isNegative());
2814 // We use an arbitrary rounding mode here. If a floating-point is an exact
2815 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
2816 // the rounding mode changes the output value, then it is not an exact
2817 // integer.
2819 bool IsExact;
2820 // If it is out of signed integer range, it will return an invalid operation.
2821 // If it is not an exact integer, IsExact is false.
2822 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
2824 !IsExact)
2825 return std::nullopt;
2826 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
2827}
2828
2829// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
2830// to the (non-zero) step S and start value X. This can be then lowered as the
2831// RVV sequence (VID * S) + X, for example.
2832// The step S is represented as an integer numerator divided by a positive
2833// denominator. Note that the implementation currently only identifies
2834// sequences in which either the numerator is +/- 1 or the denominator is 1. It
2835// cannot detect 2/3, for example.
2836// Note that this method will also match potentially unappealing index
2837// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
2838// determine whether this is worth generating code for.
2839static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
2840 unsigned NumElts = Op.getNumOperands();
2841 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
2842 bool IsInteger = Op.getValueType().isInteger();
2843
2844 std::optional<unsigned> SeqStepDenom;
2845 std::optional<int64_t> SeqStepNum, SeqAddend;
2846 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
2847 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
2848 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
2849 // Assume undef elements match the sequence; we just have to be careful
2850 // when interpolating across them.
2851 if (Op.getOperand(Idx).isUndef())
2852 continue;
2853
2854 uint64_t Val;
2855 if (IsInteger) {
2856 // The BUILD_VECTOR must be all constants.
2857 if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
2858 return std::nullopt;
2859 Val = Op.getConstantOperandVal(Idx) &
2860 maskTrailingOnes<uint64_t>(EltSizeInBits);
2861 } else {
2862 // The BUILD_VECTOR must be all constants.
2863 if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
2864 return std::nullopt;
2865 if (auto ExactInteger = getExactInteger(
2866 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
2867 EltSizeInBits))
2868 Val = *ExactInteger;
2869 else
2870 return std::nullopt;
2871 }
2872
2873 if (PrevElt) {
2874 // Calculate the step since the last non-undef element, and ensure
2875 // it's consistent across the entire sequence.
2876 unsigned IdxDiff = Idx - PrevElt->second;
2877 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
2878
2879 // A zero-value value difference means that we're somewhere in the middle
2880 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
2881 // step change before evaluating the sequence.
2882 if (ValDiff == 0)
2883 continue;
2884
2885 int64_t Remainder = ValDiff % IdxDiff;
2886 // Normalize the step if it's greater than 1.
2887 if (Remainder != ValDiff) {
2888 // The difference must cleanly divide the element span.
2889 if (Remainder != 0)
2890 return std::nullopt;
2891 ValDiff /= IdxDiff;
2892 IdxDiff = 1;
2893 }
2894
2895 if (!SeqStepNum)
2896 SeqStepNum = ValDiff;
2897 else if (ValDiff != SeqStepNum)
2898 return std::nullopt;
2899
2900 if (!SeqStepDenom)
2901 SeqStepDenom = IdxDiff;
2902 else if (IdxDiff != *SeqStepDenom)
2903 return std::nullopt;
2904 }
2905
2906 // Record this non-undef element for later.
2907 if (!PrevElt || PrevElt->first != Val)
2908 PrevElt = std::make_pair(Val, Idx);
2909 }
2910
2911 // We need to have logged a step for this to count as a legal index sequence.
2912 if (!SeqStepNum || !SeqStepDenom)
2913 return std::nullopt;
2914
2915 // Loop back through the sequence and validate elements we might have skipped
2916 // while waiting for a valid step. While doing this, log any sequence addend.
2917 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
2918 if (Op.getOperand(Idx).isUndef())
2919 continue;
2920 uint64_t Val;
2921 if (IsInteger) {
2922 Val = Op.getConstantOperandVal(Idx) &
2923 maskTrailingOnes<uint64_t>(EltSizeInBits);
2924 } else {
2925 Val = *getExactInteger(
2926 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
2927 EltSizeInBits);
2928 }
2929 uint64_t ExpectedVal =
2930 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
2931 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
2932 if (!SeqAddend)
2933 SeqAddend = Addend;
2934 else if (Addend != SeqAddend)
2935 return std::nullopt;
2936 }
2937
2938 assert(SeqAddend && "Must have an addend if we have a step");
2939
2940 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
2941}
2942
2943// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
2944// and lower it as a VRGATHER_VX_VL from the source vector.
2945static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
2946 SelectionDAG &DAG,
2947 const RISCVSubtarget &Subtarget) {
2948 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2949 return SDValue();
2950 SDValue Vec = SplatVal.getOperand(0);
2951 // Only perform this optimization on vectors of the same size for simplicity.
2952 // Don't perform this optimization for i1 vectors.
2953 // FIXME: Support i1 vectors, maybe by promoting to i8?
2954 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
2955 return SDValue();
2956 SDValue Idx = SplatVal.getOperand(1);
2957 // The index must be a legal type.
2958 if (Idx.getValueType() != Subtarget.getXLenVT())
2959 return SDValue();
2960
2961 MVT ContainerVT = VT;
2962 if (VT.isFixedLengthVector()) {
2963 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2964 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2965 }
2966
2967 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2968
2969 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
2970 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
2971
2972 if (!VT.isFixedLengthVector())
2973 return Gather;
2974
2975 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2976}
2977
2979 const RISCVSubtarget &Subtarget) {
2980 MVT VT = Op.getSimpleValueType();
2981 assert(VT.isFixedLengthVector() && "Unexpected vector!");
2982
2983 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2984
2985 SDLoc DL(Op);
2986 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2987
2988 MVT XLenVT = Subtarget.getXLenVT();
2989 unsigned NumElts = Op.getNumOperands();
2990
2991 if (VT.getVectorElementType() == MVT::i1) {
2992 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
2993 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
2994 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
2995 }
2996
2997 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
2998 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
2999 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3000 }
3001
3002 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3003 // scalar integer chunks whose bit-width depends on the number of mask
3004 // bits and XLEN.
3005 // First, determine the most appropriate scalar integer type to use. This
3006 // is at most XLenVT, but may be shrunk to a smaller vector element type
3007 // according to the size of the final vector - use i8 chunks rather than
3008 // XLenVT if we're producing a v8i1. This results in more consistent
3009 // codegen across RV32 and RV64.
3010 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3011 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
3012 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
3013 // If we have to use more than one INSERT_VECTOR_ELT then this
3014 // optimization is likely to increase code size; avoid peforming it in
3015 // such a case. We can use a load from a constant pool in this case.
3016 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3017 return SDValue();
3018 // Now we can create our integer vector type. Note that it may be larger
3019 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3020 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3021 MVT IntegerViaVecVT =
3022 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3023 IntegerViaVecElts);
3024
3025 uint64_t Bits = 0;
3026 unsigned BitPos = 0, IntegerEltIdx = 0;
3027 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3028
3029 for (unsigned I = 0; I < NumElts;) {
3030 SDValue V = Op.getOperand(I);
3031 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
3032 Bits |= ((uint64_t)BitValue << BitPos);
3033 ++BitPos;
3034 ++I;
3035
3036 // Once we accumulate enough bits to fill our scalar type or process the
3037 // last element, insert into our vector and clear our accumulated data.
3038 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3039 if (NumViaIntegerBits <= 32)
3040 Bits = SignExtend64<32>(Bits);
3041 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3042 Elts[IntegerEltIdx] = Elt;
3043 Bits = 0;
3044 BitPos = 0;
3045 IntegerEltIdx++;
3046 }
3047 }
3048
3049 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3050
3051 if (NumElts < NumViaIntegerBits) {
3052 // If we're producing a smaller vector than our minimum legal integer
3053 // type, bitcast to the equivalent (known-legal) mask type, and extract
3054 // our final mask.
3055 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3056 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3057 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3058 DAG.getConstant(0, DL, XLenVT));
3059 } else {
3060 // Else we must have produced an integer type with the same size as the
3061 // mask type; bitcast for the final result.
3062 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3063 Vec = DAG.getBitcast(VT, Vec);
3064 }
3065
3066 return Vec;
3067 }
3068
3069 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3070 // vector type, we have a legal equivalently-sized i8 type, so we can use
3071 // that.
3072 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3073 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3074
3075 SDValue WideVec;
3077 // For a splat, perform a scalar truncate before creating the wider
3078 // vector.
3079 assert(Splat.getValueType() == XLenVT &&
3080 "Unexpected type for i1 splat value");
3081 Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
3082 DAG.getConstant(1, DL, XLenVT));
3083 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3084 } else {
3085 SmallVector<SDValue, 8> Ops(Op->op_values());
3086 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3087 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3088 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3089 }
3090
3091 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3092 }
3093
3095 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3096 return Gather;
3097 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3099 Splat =
3100 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3101 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3102 }
3103
3104 // Try and match index sequences, which we can lower to the vid instruction
3105 // with optional modifications. An all-undef vector is matched by
3106 // getSplatValue, above.
3107 if (auto SimpleVID = isSimpleVIDSequence(Op)) {
3108 int64_t StepNumerator = SimpleVID->StepNumerator;
3109 unsigned StepDenominator = SimpleVID->StepDenominator;
3110 int64_t Addend = SimpleVID->Addend;
3111
3112 assert(StepNumerator != 0 && "Invalid step");
3113 bool Negate = false;
3114 int64_t SplatStepVal = StepNumerator;
3115 unsigned StepOpcode = ISD::MUL;
3116 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3117 // anyway as the shift of 63 won't fit in uimm5.
3118 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3119 isPowerOf2_64(std::abs(StepNumerator))) {
3120 Negate = StepNumerator < 0;
3121 StepOpcode = ISD::SHL;
3122 SplatStepVal = Log2_64(std::abs(StepNumerator));
3123 }
3124
3125 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3126 // threshold since it's the immediate value many RVV instructions accept.
3127 // There is no vmul.vi instruction so ensure multiply constant can fit in
3128 // a single addi instruction.
3129 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3130 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3131 isPowerOf2_32(StepDenominator) &&
3132 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3133 MVT VIDVT =
3135 MVT VIDContainerVT =
3136 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3137 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3138 // Convert right out of the scalable type so we can use standard ISD
3139 // nodes for the rest of the computation. If we used scalable types with
3140 // these, we'd lose the fixed-length vector info and generate worse
3141 // vsetvli code.
3142 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3143 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3144 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3145 SDValue SplatStep = DAG.getSplatBuildVector(
3146 VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
3147 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3148 }
3149 if (StepDenominator != 1) {
3150 SDValue SplatStep = DAG.getSplatBuildVector(
3151 VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
3152 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3153 }
3154 if (Addend != 0 || Negate) {
3155 SDValue SplatAddend = DAG.getSplatBuildVector(
3156 VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT));
3157 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3158 VID);
3159 }
3160 if (VT.isFloatingPoint()) {
3161 // TODO: Use vfwcvt to reduce register pressure.
3162 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3163 }
3164 return VID;
3165 }
3166 }
3167
3168 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3169 // when re-interpreted as a vector with a larger element type. For example,
3170 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3171 // could be instead splat as
3172 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3173 // TODO: This optimization could also work on non-constant splats, but it
3174 // would require bit-manipulation instructions to construct the splat value.
3175 SmallVector<SDValue> Sequence;
3176 unsigned EltBitSize = VT.getScalarSizeInBits();
3177 const auto *BV = cast<BuildVectorSDNode>(Op);
3178 if (VT.isInteger() && EltBitSize < 64 &&
3180 BV->getRepeatedSequence(Sequence) &&
3181 (Sequence.size() * EltBitSize) <= 64) {
3182 unsigned SeqLen = Sequence.size();
3183 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3184 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
3185 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3186 ViaIntVT == MVT::i64) &&
3187 "Unexpected sequence type");
3188
3189 unsigned EltIdx = 0;
3190 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3191 uint64_t SplatValue = 0;
3192 // Construct the amalgamated value which can be splatted as this larger
3193 // vector type.
3194 for (const auto &SeqV : Sequence) {
3195 if (!SeqV.isUndef())
3196 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3197 << (EltIdx * EltBitSize));
3198 EltIdx++;
3199 }
3200
3201 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3202 // achieve better constant materializion.
3203 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3204 SplatValue = SignExtend64<32>(SplatValue);
3205
3206 // Since we can't introduce illegal i64 types at this stage, we can only
3207 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3208 // way we can use RVV instructions to splat.
3209 assert((ViaIntVT.bitsLE(XLenVT) ||
3210 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3211 "Unexpected bitcast sequence");
3212 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3213 SDValue ViaVL =
3214 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3215 MVT ViaContainerVT =
3216 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3217 SDValue Splat =
3218 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3219 DAG.getUNDEF(ViaContainerVT),
3220 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3221 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3222 return DAG.getBitcast(VT, Splat);
3223 }
3224 }
3225
3226 // Try and optimize BUILD_VECTORs with "dominant values" - these are values
3227 // which constitute a large proportion of the elements. In such cases we can
3228 // splat a vector with the dominant element and make up the shortfall with
3229 // INSERT_VECTOR_ELTs.
3230 // Note that this includes vectors of 2 elements by association. The
3231 // upper-most element is the "dominant" one, allowing us to use a splat to
3232 // "insert" the upper element, and an insert of the lower element at position
3233 // 0, which improves codegen.
3234 SDValue DominantValue;
3235 unsigned MostCommonCount = 0;
3236 DenseMap<SDValue, unsigned> ValueCounts;
3237 unsigned NumUndefElts =
3238 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3239
3240 // Track the number of scalar loads we know we'd be inserting, estimated as
3241 // any non-zero floating-point constant. Other kinds of element are either
3242 // already in registers or are materialized on demand. The threshold at which
3243 // a vector load is more desirable than several scalar materializion and
3244 // vector-insertion instructions is not known.
3245 unsigned NumScalarLoads = 0;
3246
3247 for (SDValue V : Op->op_values()) {
3248 if (V.isUndef())
3249 continue;
3250
3251 ValueCounts.insert(std::make_pair(V, 0));
3252 unsigned &Count = ValueCounts[V];
3253 if (0 == Count)
3254 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3255 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3256
3257 // Is this value dominant? In case of a tie, prefer the highest element as
3258 // it's cheaper to insert near the beginning of a vector than it is at the
3259 // end.
3260 if (++Count >= MostCommonCount) {
3261 DominantValue = V;
3262 MostCommonCount = Count;
3263 }
3264 }
3265
3266 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3267 unsigned NumDefElts = NumElts - NumUndefElts;
3268 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3269
3270 // Don't perform this optimization when optimizing for size, since
3271 // materializing elements and inserting them tends to cause code bloat.
3272 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3273 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3274 ((MostCommonCount > DominantValueCountThreshold) ||
3275 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3276 // Start by splatting the most common element.
3277 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3278
3279 DenseSet<SDValue> Processed{DominantValue};
3280 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3281 for (const auto &OpIdx : enumerate(Op->ops())) {
3282 const SDValue &V = OpIdx.value();
3283 if (V.isUndef() || !Processed.insert(V).second)
3284 continue;
3285 if (ValueCounts[V] == 1) {
3286 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3287 DAG.getConstant(OpIdx.index(), DL, XLenVT));
3288 } else {
3289 // Blend in all instances of this value using a VSELECT, using a
3290 // mask where each bit signals whether that element is the one
3291 // we're after.
3293 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3294 return DAG.getConstant(V == V1, DL, XLenVT);
3295 });
3296 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3297 DAG.getBuildVector(SelMaskTy, DL, Ops),
3298 DAG.getSplatBuildVector(VT, DL, V), Vec);
3299 }
3300 }
3301
3302 return Vec;
3303 }
3304
3305 // For constant vectors, use generic constant pool lowering. Otherwise,
3306 // we'd have to materialize constants in GPRs just to move them into the
3307 // vector.
3308 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3310 return SDValue();
3311
3312 assert((!VT.isFloatingPoint() ||
3313 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
3314 "Illegal type which will result in reserved encoding");
3315
3316 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3317
3318 SDValue Vec = DAG.getUNDEF(ContainerVT);
3319 unsigned UndefCount = 0;
3320 for (const SDValue &V : Op->ops()) {
3321 if (V.isUndef()) {
3322 UndefCount++;
3323 continue;
3324 }
3325 if (UndefCount) {
3326 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3327 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3328 Vec, Offset, Mask, VL, Policy);
3329 UndefCount = 0;
3330 }
3331 auto OpCode =
3333 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3334 V, Mask, VL);
3335 }
3336 if (UndefCount) {
3337 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3338 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3339 Vec, Offset, Mask, VL, Policy);
3340 }
3341 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3342}
3343
3344static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3346 SelectionDAG &DAG) {
3347 if (!Passthru)
3348 Passthru = DAG.getUNDEF(VT);
3350 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3351 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3352 // If Hi constant is all the same sign bit as Lo, lower this as a custom
3353 // node in order to try and match RVV vector/scalar instructions.
3354 if ((LoC >> 31) == HiC)
3355 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3356
3357 // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
3358 // vmv.v.x whose EEW = 32 to lower it.
3359 if (LoC == HiC && isAllOnesConstant(VL)) {
3360 MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
3361 // TODO: if vl <= min(VLMAX), we can also do this. But we could not
3362 // access the subtarget here now.
3363 auto InterVec = DAG.getNode(
3364 RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
3365 DAG.getRegister(RISCV::X0, MVT::i32));
3366 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
3367 }
3368 }
3369
3370 // Fall back to a stack store and stride x0 vector load.
3371 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
3372 Hi, VL);
3373}
3374
3375// Called by type legalization to handle splat of i64 on RV32.
3376// FIXME: We can optimize this when the type has sign or zero bits in one
3377// of the halves.
3378static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3379 SDValue Scalar, SDValue VL,
3380 SelectionDAG &DAG) {
3381 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
3382 SDValue Lo, Hi;
3383 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
3384 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
3385}
3386
3387// This function lowers a splat of a scalar operand Splat with the vector
3388// length VL. It ensures the final sequence is type legal, which is useful when
3389// lowering a splat after type legalization.
3390static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
3391 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
3392 const RISCVSubtarget &Subtarget) {
3393 bool HasPassthru = Passthru && !Passthru.isUndef();
3394 if (!HasPassthru && !Passthru)
3395 Passthru = DAG.getUNDEF(VT);
3396 if (VT.isFloatingPoint()) {
3397 // If VL is 1, we could use vfmv.s.f.
3398 if (isOneConstant(VL))
3399 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
3400 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
3401 }
3402
3403 MVT XLenVT = Subtarget.getXLenVT();
3404
3405 // Simplest case is that the operand needs to be promoted to XLenVT.
3406 if (Scalar.getValueType().bitsLE(XLenVT)) {
3407 // If the operand is a constant, sign extend to increase our chances
3408 // of being able to use a .vi instruction. ANY_EXTEND would become a
3409 // a zero extend and the simm5 check in isel would fail.
3410 // FIXME: Should we ignore the upper bits in isel instead?
3411 unsigned ExtOpc =
3413 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3415 // If VL is 1 and the scalar value won't benefit from immediate, we could
3416 // use vmv.s.x.
3417 if (isOneConstant(VL) &&
3418 (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
3419 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
3420 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
3421 }
3422
3423 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
3424 "Unexpected scalar for splat lowering!");
3425
3426 if (isOneConstant(VL) && isNullConstant(Scalar))
3427 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
3428 DAG.getConstant(0, DL, XLenVT), VL);
3429
3430 // Otherwise use the more complicated splatting algorithm.
3431 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
3432}
3433
3434static MVT getLMUL1VT(MVT VT) {
3436 "Unexpected vector MVT");
3440}
3441
3442// This function lowers an insert of a scalar operand Scalar into lane
3443// 0 of the vector regardless of the value of VL. The contents of the
3444// remaining lanes of the result vector are unspecified. VL is assumed
3445// to be non-zero.
3447 const SDLoc &DL, SelectionDAG &DAG,
3448 const RISCVSubtarget &Subtarget) {
3449 const MVT XLenVT = Subtarget.getXLenVT();
3450
3451 SDValue Passthru = DAG.getUNDEF(VT);
3452 if (VT.isFloatingPoint()) {
3453 // TODO: Use vmv.v.i for appropriate constants
3454 // Use M1 or smaller to avoid over constraining register allocation
3455 const MVT M1VT = getLMUL1VT(VT);
3456 auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
3457 SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT,
3458 DAG.getUNDEF(InnerVT), Scalar, VL);
3459 if (VT != InnerVT)
3460 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3461 DAG.getUNDEF(VT),
3462 Result, DAG.getConstant(0, DL, XLenVT));
3463 return Result;
3464 }
3465
3466
3467 // Avoid the tricky legalization cases by falling back to using the
3468 // splat code which already handles it gracefully.
3469 if (!Scalar.getValueType().bitsLE(XLenVT))
3470 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
3471 DAG.getConstant(1, DL, XLenVT),
3472 VT, DL, DAG, Subtarget);
3473
3474 // If the operand is a constant, sign extend to increase our chances
3475 // of being able to use a .vi instruction. ANY_EXTEND would become a
3476 // a zero extend and the simm5 check in isel would fail.
3477 // FIXME: Should we ignore the upper bits in isel instead?
3478 unsigned ExtOpc =
3480 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3481 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
3482 // higher would involve overly constraining the register allocator for
3483 // no purpose.
3484 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar)) {
3485 if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) &&
3486 VT.bitsLE(getLMUL1VT(VT)))
3487 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
3488 }
3489 // Use M1 or smaller to avoid over constraining register allocation
3490 const MVT M1VT = getLMUL1VT(VT);
3491 auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
3492 SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT,
3493 DAG.getUNDEF(InnerVT), Scalar, VL);
3494 if (VT != InnerVT)
3495 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3496 DAG.getUNDEF(VT),
3497 Result, DAG.getConstant(0, DL, XLenVT));
3498 return Result;
3499}
3500
3501// Is this a shuffle extracts either the even or odd elements of a vector?
3502// That is, specifically, either (a) or (b) below.
3503// t34: v8i8 = extract_subvector t11, Constant:i64<0>
3504// t33: v8i8 = extract_subvector t11, Constant:i64<8>
3505// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
3506// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
3507// Returns {Src Vector, Even Elements} om success
3508static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
3509 SDValue V2, ArrayRef<int> Mask,
3510 const RISCVSubtarget &Subtarget) {
3511 // Need to be able to widen the vector.
3512 if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
3513 return false;
3514
3515 // Both input must be extracts.
3516 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
3517 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
3518 return false;
3519
3520 // Extracting from the same source.
3521 SDValue Src = V1.getOperand(0);
3522 if (Src != V2.getOperand(0))
3523 return false;
3524
3525 // Src needs to have twice the number of elements.
3526 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
3527 return false;
3528
3529 // The extracts must extract the two halves of the source.
3530 if (V1.getConstantOperandVal(1) != 0 ||
3531 V2.getConstantOperandVal(1) != Mask.size())
3532 return false;
3533
3534 // First index must be the first even or odd element from V1.
3535 if (Mask[0] != 0 && Mask[0] != 1)
3536 return false;
3537
3538 // The others must increase by 2 each time.
3539 // TODO: Support undef elements?
3540 for (unsigned i = 1; i != Mask.size(); ++i)
3541 if (Mask[i] != Mask[i - 1] + 2)
3542 return false;
3543
3544 return true;
3545}
3546
3547/// Is this shuffle interleaving contiguous elements from one vector into the
3548/// even elements and contiguous elements from another vector into the odd
3549/// elements. \p EvenSrc will contain the element that should be in the first
3550/// even element. \p OddSrc will contain the element that should be in the first
3551/// odd element. These can be the first element in a source or the element half
3552/// way through the source.
3553static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
3554 int &OddSrc, const RISCVSubtarget &Subtarget) {
3555 // We need to be able to widen elements to the next larger integer type.
3556 if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
3557 return false;
3558
3559 int Size = Mask.size();
3560 int NumElts = VT.getVectorNumElements();
3561 assert(Size == (int)NumElts && "Unexpected mask size");
3562
3563 SmallVector<unsigned, 2> StartIndexes;
3564 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
3565 return false;
3566
3567 EvenSrc = StartIndexes[0];
3568 OddSrc = StartIndexes[1];
3569
3570 // One source should be low half of first vector.
3571 if (EvenSrc != 0 && OddSrc != 0)
3572 return false;
3573
3574 // Subvectors will be subtracted from either at the start of the two input
3575 // vectors, or at the start and middle of the first vector if it's an unary
3576 // interleave.
3577 // In both cases, HalfNumElts will be extracted.
3578 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
3579 // we'll create an illegal extract_subvector.
3580 // FIXME: We could support other values using a slidedown first.
3581 int HalfNumElts = NumElts / 2;
3582 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
3583}
3584
3585/// Match shuffles that concatenate two vectors, rotate the concatenation,
3586/// and then extract the original number of elements from the rotated result.
3587/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
3588/// returned rotation amount is for a rotate right, where elements move from
3589/// higher elements to lower elements. \p LoSrc indicates the first source
3590/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
3591/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
3592/// 0 or 1 if a rotation is found.
3593///
3594/// NOTE: We talk about rotate to the right which matches how bit shift and
3595/// rotate instructions are described where LSBs are on the right, but LLVM IR
3596/// and the table below write vectors with the lowest elements on the left.
3597static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
3598 int Size = Mask.size();
3599
3600 // We need to detect various ways of spelling a rotation:
3601 // [11, 12, 13, 14, 15, 0, 1, 2]
3602 // [-1, 12, 13, 14, -1, -1, 1, -1]
3603 // [-1, -1, -1, -1, -1, -1, 1, 2]
3604 // [ 3, 4, 5, 6, 7, 8, 9, 10]
3605 // [-1, 4, 5, 6, -1, -1, 9, -1]
3606 // [-1, 4, 5, 6, -1, -1, -1, -1]
3607 int Rotation = 0;
3608 LoSrc = -1;
3609 HiSrc = -1;
3610 for (int i = 0; i != Size; ++i) {
3611 int M = Mask[i];
3612 if (M < 0)
3613 continue;
3614
3615 // Determine where a rotate vector would have started.
3616 int StartIdx = i - (M % Size);
3617 // The identity rotation isn't interesting, stop.
3618 if (StartIdx == 0)
3619 return -1;
3620
3621 // If we found the tail of a vector the rotation must be the missing
3622 // front. If we found the head of a vector, it must be how much of the
3623 // head.
3624 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
3625
3626 if (Rotation == 0)
3627 Rotation = CandidateRotation;
3628 else if (Rotation != CandidateRotation)
3629 // The rotations don't match, so we can't match this mask.
3630 return -1;
3631
3632 // Compute which value this mask is pointing at.
3633 int MaskSrc = M < Size ? 0 : 1;
3634
3635 // Compute which of the two target values this index should be assigned to.
3636 // This reflects whether the high elements are remaining or the low elemnts
3637 // are remaining.
3638 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
3639
3640 // Either set up this value if we've not encountered it before, or check
3641 // that it remains consistent.
3642 if (TargetSrc < 0)
3643 TargetSrc = MaskSrc;
3644 else if (TargetSrc != MaskSrc)
3645 // This may be a rotation, but it pulls from the inputs in some
3646 // unsupported interleaving.
3647 return -1;
3648 }
3649
3650 // Check that we successfully analyzed the mask, and normalize the results.
3651 assert(Rotation != 0 && "Failed to locate a viable rotation!");
3652 assert((LoSrc >= 0 || HiSrc >= 0) &&
3653 "Failed to find a rotated input vector!");
3654
3655 return Rotation;
3656}
3657
3658// Lower a deinterleave shuffle to vnsrl.
3659// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
3660// -> [p, q, r, s] (EvenElts == false)
3661// VT is the type of the vector to return, <[vscale x ]n x ty>
3662// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
3664 bool EvenElts,
3665 const RISCVSubtarget &Subtarget,
3666 SelectionDAG &DAG) {
3667 // The result is a vector of type <m x n x ty>
3668 MVT ContainerVT = VT;
3669 // Convert fixed vectors to scalable if needed
3670 if (ContainerVT.isFixedLengthVector()) {
3671 assert(Src.getSimpleValueType().isFixedLengthVector());
3672 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
3673
3674 // The source is a vector of type <m x n*2 x ty>
3675 MVT SrcContainerVT =
3677 ContainerVT.getVectorElementCount() * 2);
3678 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3679 }
3680
3681 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3682
3683 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
3684 // This also converts FP to int.
3685 unsigned EltBits = ContainerVT.getScalarSizeInBits();
3686 MVT WideSrcContainerVT = MVT::getVectorVT(
3687 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
3688 Src = DAG.getBitcast(WideSrcContainerVT, Src);
3689
3690 // The integer version of the container type.
3691 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
3692
3693 // If we want even elements, then the shift amount is 0. Otherwise, shift by
3694 // the original element size.
3695 unsigned Shift = EvenElts ? 0 : EltBits;
3696 SDValue SplatShift = DAG.getNode(
3697 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
3698 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
3699 SDValue Res =
3700 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
3701 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
3702 // Cast back to FP if needed.
3703 Res = DAG.getBitcast(ContainerVT, Res);
3704
3705 if (VT.isFixedLengthVector())
3706 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
3707 return Res;
3708}
3709
3710// Lower the following shuffle to vslidedown.
3711// a)
3712// t49: v8i8 = extract_subvector t13, Constant:i64<0>
3713// t109: v8i8 = extract_subvector t13, Constant:i64<8>
3714// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
3715// b)
3716// t69: v16i16 = extract_subvector t68, Constant:i64<0>
3717// t23: v8i16 = extract_subvector t69, Constant:i64<0>
3718// t29: v4i16 = extract_subvector t23, Constant:i64<4>
3719// t26: v8i16 = extract_subvector t69, Constant:i64<8>
3720// t30: v4i16 = extract_subvector t26, Constant:i64<0>
3721// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
3723 SDValue V1, SDValue V2,
3724 ArrayRef<int> Mask,
3725 const RISCVSubtarget &Subtarget,
3726 SelectionDAG &DAG) {
3727 auto findNonEXTRACT_SUBVECTORParent =
3728 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
3729 uint64_t Offset = 0;
3730 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
3731 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
3732 // a scalable vector. But we don't want to match the case.
3733 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
3734 Offset += Parent.getConstantOperandVal(1);
3735 Parent = Parent.getOperand(0);
3736 }
3737 return std::make_pair(Parent, Offset);
3738 };
3739
3740 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
3741 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
3742
3743 // Extracting from the same source.
3744 SDValue Src = V1Src;
3745 if (Src != V2Src)
3746 return SDValue();
3747
3748 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
3749 SmallVector<int, 16> NewMask(Mask);
3750 for (size_t i = 0; i != NewMask.size(); ++i) {
3751 if (NewMask[i] == -1)
3752 continue;
3753
3754 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
3755 NewMask[i] = NewMask[i] + V1IndexOffset;
3756 } else {
3757 // Minus NewMask.size() is needed. Otherwise, the b case would be
3758 // <5,6,7,12> instead of <5,6,7,8>.
3759 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
3760 }
3761 }
3762
3763 // First index must be known and non-zero. It will be used as the slidedown
3764 // amount.
3765 if (NewMask[0] <= 0)
3766 return SDValue();
3767
3768 // NewMask is also continuous.
3769 for (unsigned i = 1; i != NewMask.size(); ++i)
3770 if (NewMask[i - 1] + 1 != NewMask[i])
3771 return SDValue();
3772
3773 MVT XLenVT = Subtarget.getXLenVT();
3774 MVT SrcVT = Src.getSimpleValueType();
3775 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3776 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
3777 SDValue Slidedown =
3778 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3779 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
3780 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
3781 return DAG.getNode(
3783 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
3784 DAG.getConstant(0, DL, XLenVT));
3785}
3786
3787// Because vslideup leaves the destination elements at the start intact, we can
3788// use it to perform shuffles that insert subvectors:
3789//
3790// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
3791// ->
3792// vsetvli zero, 8, e8, mf2, ta, ma
3793// vslideup.vi v8, v9, 4
3794//
3795// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
3796// ->
3797// vsetvli zero, 5, e8, mf2, tu, ma
3798// vslideup.v1 v8, v9, 2
3800 SDValue V1, SDValue V2,
3801 ArrayRef<int> Mask,
3802 const RISCVSubtarget &Subtarget,
3803 SelectionDAG &DAG) {
3804 unsigned NumElts = VT.getVectorNumElements();
3805 int NumSubElts, Index;
3806 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
3807 Index))
3808 return SDValue();
3809
3810 bool OpsSwapped = Mask[Index] < (int)NumElts;
3811 SDValue InPlace = OpsSwapped ? V2 : V1;
3812 SDValue ToInsert = OpsSwapped ? V1 : V2;
3813
3814 MVT XLenVT = Subtarget.getXLenVT();
3815 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3816 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
3817 // We slide up by the index that the subvector is being inserted at, and set
3818 // VL to the index + the number of elements being inserted.
3820 // If the we're adding a suffix to the in place vector, i.e. inserting right
3821 // up to the very end of it, then we don't actually care about the tail.
3822 if (NumSubElts + Index >= (int)NumElts)
3823 Policy |= RISCVII::TAIL_AGNOSTIC;
3824
3825 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
3826 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
3827 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
3828
3829 SDValue Res;
3830 // If we're inserting into the lowest elements, use a tail undisturbed
3831 // vmv.v.v.
3832 if (Index == 0)
3833 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
3834 VL);
3835 else
3836 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
3837 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
3838 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3839}
3840
3841/// Match v(f)slide1up/down idioms. These operations involve sliding
3842/// N-1 elements to make room for an inserted scalar at one end.
3844 SDValue V1, SDValue V2,
3845 ArrayRef<int> Mask,
3846 const RISCVSubtarget &Subtarget,
3847 SelectionDAG &DAG) {
3848 bool OpsSwapped = false;
3849 if (!isa<BuildVectorSDNode>(V1)) {
3850 if (!isa<BuildVectorSDNode>(V2))
3851 return SDValue();
3852 std::swap(V1, V2);
3853 OpsSwapped = true;
3854 }
3855 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
3856 if (!Splat)
3857 return SDValue();
3858
3859 // Return true if the mask could describe a slide of Mask.size() - 1
3860 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
3861 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
3862 const unsigned S = (Offset > 0) ? 0 : -Offset;
3863 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
3864 for (unsigned i = S; i != E; ++i)
3865 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
3866 return false;
3867 return true;
3868 };
3869
3870 const unsigned NumElts = VT.getVectorNumElements();
3871 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
3872 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
3873 return SDValue();
3874
3875 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
3876 // Inserted lane must come from splat, undef scalar is legal but not profitable.
3877 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
3878 return SDValue();
3879
3880 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3881 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3882 auto OpCode = IsVSlidedown ?
3885 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
3886 DAG.getUNDEF(ContainerVT),
3887 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
3888 Splat, TrueMask, VL);
3889 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3890}
3891
3892// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
3893// to create an interleaved vector of <[vscale x] n*2 x ty>.
3894// This requires that the size of ty is less than the subtarget's maximum ELEN.
3896 const SDLoc &DL, SelectionDAG &DAG,
3897 const RISCVSubtarget &Subtarget) {
3898 MVT VecVT = EvenV.getSimpleValueType();
3899 MVT VecContainerVT = VecVT; // <vscale x n x ty>
3900 // Convert fixed vectors to scalable if needed
3901 if (VecContainerVT.isFixedLengthVector()) {
3902 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
3903 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
3904 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
3905 }
3906
3907 assert(VecVT.getScalarSizeInBits() < Subtarget.getELEN());
3908
3909 // We're working with a vector of the same size as the resulting
3910 // interleaved vector, but with half the number of elements and
3911 // twice the SEW (Hence the restriction on not using the maximum
3912 // ELEN)
3913 MVT WideVT =
3915 VecVT.getVectorElementCount());
3916 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
3917 if (WideContainerVT.isFixedLengthVector())
3918 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
3919
3920 // Bitcast the input vectors to integers in case they are FP
3921 VecContainerVT = VecContainerVT.changeTypeToInteger();
3922 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
3923 OddV = DAG.getBitcast(VecContainerVT, OddV);
3924
3925 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
3926 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
3927
3928 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
3929 // vwaddu.vv
3930 SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT,
3931 EvenV, OddV, Passthru, Mask, VL);
3932
3933 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
3934 SDValue AllOnesVec = DAG.getSplatVector(
3935 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
3936 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, OddV,
3937 AllOnesVec, Passthru, Mask, VL);
3938
3939 // Add the two together so we get
3940 // (OddV * 0xff...ff) + (OddV + EvenV)
3941 // = (OddV * 0x100...00) + EvenV
3942 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
3943 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
3944 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, Interleaved,
3945 OddsMul, Passthru, Mask, VL);
3946
3947 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
3948 MVT ResultContainerVT = MVT::getVectorVT(
3949 VecVT.getVectorElementType(), // Make sure to use original type
3950 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
3951 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
3952
3953 // Convert back to a fixed vector if needed
3954 MVT ResultVT =
3957 if (ResultVT.isFixedLengthVector())
3958 Interleaved =
3959 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
3960
3961 return Interleaved;
3962}
3963
3965 const RISCVSubtarget &Subtarget) {
3966 SDValue V1 = Op.getOperand(0);
3967 SDValue V2 = Op.getOperand(1);
3968 SDLoc DL(Op);
3969 MVT XLenVT = Subtarget.getXLenVT();
3970 MVT VT = Op.getSimpleValueType();
3971 unsigned NumElts = VT.getVectorNumElements();
3973
3974 // Promote i1 shuffle to i8 shuffle.
3975 if (VT.getVectorElementType() == MVT::i1) {
3976 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
3977 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
3978 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
3979 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
3980 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
3981 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
3982 ISD::SETNE);
3983 }
3984
3985 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3986
3987 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3988
3989 if (SVN->isSplat()) {
3990 const int Lane = SVN->getSplatIndex();
3991 if (Lane >= 0) {
3992 MVT SVT = VT.getVectorElementType();
3993
3994 // Turn splatted vector load into a strided load with an X0 stride.
3995 SDValue V = V1;
3996 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
3997 // with undef.
3998 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
3999 int Offset = Lane;
4000 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4001 int OpElements =
4002 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4003 V = V.getOperand(Offset / OpElements);
4004 Offset %= OpElements;
4005 }
4006
4007 // We need to ensure the load isn't atomic or volatile.
4008 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4009 auto *Ld = cast<LoadSDNode>(V);
4010 Offset *= SVT.getStoreSize();
4011 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
4013
4014 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4015 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4016 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4017 SDValue IntID =
4018 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4019 SDValue Ops[] = {Ld->getChain(),
4020 IntID,
4021 DAG.getUNDEF(ContainerVT),
4022 NewAddr,
4023 DAG.getRegister(RISCV::X0, XLenVT),
4024 VL};
4025 SDValue NewLoad = DAG.getMemIntrinsicNode(
4026 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4028 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4029 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4030 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4031 }
4032
4033 // Otherwise use a scalar load and splat. This will give the best
4034 // opportunity to fold a splat into the operation. ISel can turn it into
4035 // the x0 strided load if we aren't able to fold away the select.
4036 if (SVT.isFloatingPoint())
4037 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4038 Ld->getPointerInfo().getWithOffset(Offset),
4039 Ld->getOriginalAlign(),
4040 Ld->getMemOperand()->getFlags());
4041 else
4042 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4043 Ld->getPointerInfo().getWithOffset(Offset), SVT,
4044 Ld->getOriginalAlign(),
4045 Ld->getMemOperand()->getFlags());
4047
4048 unsigned Opc =
4050 SDValue Splat =
4051 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4052 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4053 }
4054
4055 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4056 assert(Lane < (int)NumElts && "Unexpected lane!");
4057 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4058 V1, DAG.getConstant(Lane, DL, XLenVT),
4059 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4060 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4061 }
4062 }
4063
4064 ArrayRef<int> Mask = SVN->getMask();
4065
4066 if (SDValue V =
4067 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
4068 return V;
4069
4070 if (SDValue V =
4071 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
4072 return V;
4073
4074 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
4075 // be undef which can be handled with a single SLIDEDOWN/UP.
4076 int LoSrc, HiSrc;
4077 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
4078 if (Rotation > 0) {
4079 SDValue LoV, HiV;
4080 if (LoSrc >= 0) {
4081 LoV = LoSrc == 0 ? V1 : V2;
4082 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
4083 }
4084 if (HiSrc >= 0) {
4085 HiV = HiSrc == 0 ? V1 : V2;
4086 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
4087 }
4088
4089 // We found a rotation. We need to slide HiV down by Rotation. Then we need
4090 // to slide LoV up by (NumElts - Rotation).
4091 unsigned InvRotate = NumElts - Rotation;
4092
4093 SDValue Res = DAG.getUNDEF(ContainerVT);
4094 if (HiV) {
4095 // Even though we could use a smaller VL, don't to avoid a vsetivli
4096 // toggle.
4097 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
4098 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
4099 }
4100 if (LoV)
4101 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
4102 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
4104
4105 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4106 }
4107
4108 // If this is a deinterleave and we can widen the vector, then we can use
4109 // vnsrl to deinterleave.
4110 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
4111 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
4112 Subtarget, DAG);
4113 }
4114
4115 if (SDValue V =
4116 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
4117 return V;
4118
4119 // Detect an interleave shuffle and lower to
4120 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
4121 int EvenSrc, OddSrc;
4122 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
4123 // Extract the halves of the vectors.
4124 MVT HalfVT = VT.getHalfNumVectorElementsVT();
4125
4126 int Size = Mask.size();
4127 SDValue EvenV, OddV;
4128 assert(EvenSrc >= 0 && "Undef source?");
4129 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
4130 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
4131 DAG.getConstant(EvenSrc % Size, DL, XLenVT));
4132
4133 assert(OddSrc >= 0 && "Undef source?");
4134 OddV = (OddSrc / Size) == 0 ? V1 : V2;
4135 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
4136 DAG.getConstant(OddSrc % Size, DL, XLenVT));
4137
4138 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
4139 }
4140
4141 // Detect shuffles which can be re-expressed as vector selects; these are
4142 // shuffles in which each element in the destination is taken from an element
4143 // at the corresponding index in either source vectors.
4144 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
4145 int MaskIndex = MaskIdx.value();
4146 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
4147 });
4148
4149 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
4150
4151 SmallVector<SDValue> MaskVals;
4152 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
4153 // merged with a second vrgather.
4154 SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
4155
4156 // By default we preserve the original operand order, and use a mask to
4157 // select LHS as true and RHS as false. However, since RVV vector selects may
4158 // feature splats but only on the LHS, we may choose to invert our mask and
4159 // instead select between RHS and LHS.
4160 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
4161 bool InvertMask = IsSelect == SwapOps;
4162
4163 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
4164 // half.
4165 DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
4166
4167 // Now construct the mask that will be used by the vselect or blended
4168 // vrgather operation. For vrgathers, construct the appropriate indices into
4169 // each vector.
4170 for (int MaskIndex : Mask) {
4171 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
4172 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4173 if (!IsSelect) {
4174 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
4175 GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
4176 ? DAG.getConstant(MaskIndex, DL, XLenVT)
4177 : DAG.getUNDEF(XLenVT));
4178 GatherIndicesRHS.push_back(
4179 IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
4180 : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
4181 if (IsLHSOrUndefIndex && MaskIndex >= 0)
4182 ++LHSIndexCounts[MaskIndex];
4183 if (!IsLHSOrUndefIndex)
4184 ++RHSIndexCounts[MaskIndex - NumElts];
4185 }
4186 }
4187
4188 if (SwapOps) {
4189 std::swap(V1, V2);
4190 std::swap(GatherIndicesLHS, GatherIndicesRHS);
4191 }
4192
4193 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
4194 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4195 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4196
4197 if (IsSelect)
4198 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
4199
4200 if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
4201 // On such a large vector we're unable to use i8 as the index type.
4202 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
4203 // may involve vector splitting if we're already at LMUL=8, or our
4204 // user-supplied maximum fixed-length LMUL.
4205 return SDValue();
4206 }
4207
4208 unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
4209 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
4210 MVT IndexVT = VT.changeTypeToInteger();
4211 // Since we can't introduce illegal index types at this stage, use i16 and
4212 // vrgatherei16 if the corresponding index type for plain vrgather is greater
4213 // than XLenVT.
4214 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
4215 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
4216 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
4217 }
4218
4219 MVT IndexContainerVT =
4220 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
4221
4222 SDValue Gather;
4223 // TODO: This doesn't trigger for i64 vectors on RV32, since there we
4224 // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
4225 if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
4226 Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
4227 Subtarget);
4228 } else {
4229 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4230 // If only one index is used, we can use a "splat" vrgather.
4231 // TODO: We can splat the most-common index and fix-up any stragglers, if
4232 // that's beneficial.
4233 if (LHSIndexCounts.size() == 1) {
4234 int SplatIndex = LHSIndexCounts.begin()->getFirst();
4235 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
4236 DAG.getConstant(SplatIndex, DL, XLenVT),
4237 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4238 } else {
4239 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
4240 LHSIndices =
4241 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
4242
4243 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
4244 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4245 }
4246 }
4247
4248 // If a second vector operand is used by this shuffle, blend it in with an
4249 // additional vrgather.
4250 if (!V2.isUndef()) {
4251 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4252
4253 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
4254 SelectMask =
4255 convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
4256
4257 // If only one index is used, we can use a "splat" vrgather.
4258 // TODO: We can splat the most-common index and fix-up any stragglers, if
4259 // that's beneficial.
4260 if (RHSIndexCounts.size() == 1) {
4261 int SplatIndex = RHSIndexCounts.begin()->getFirst();
4262 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
4263 DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
4264 SelectMask, VL);
4265 } else {
4266 SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
4267 RHSIndices =
4268 convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
4269 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
4270 SelectMask, VL);
4271 }
4272 }
4273
4274 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4275}
4276
4278 // Support splats for any type. These should type legalize well.
4279 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
4280 return true;
4281
4282 // Only support legal VTs for other shuffles for now.
4283 if (!isTypeLegal(VT))
4284 return false;
4285
4286 MVT SVT = VT.getSimpleVT();
4287
4288 // Not for i1 vectors.
4289 if (SVT.getScalarType() == MVT::i1)
4290 return false;
4291
4292 int Dummy1, Dummy2;
4293 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
4294 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
4295}
4296
4297// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
4298// the exponent.
4299SDValue
4300RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
4301 SelectionDAG &DAG) const {
4302 MVT VT = Op.getSimpleValueType();
4303 unsigned EltSize = VT.getScalarSizeInBits();
4304 SDValue Src = Op.getOperand(0);
4305 SDLoc DL(Op);
4306 MVT ContainerVT = VT;
4307
4308 SDValue Mask, VL;
4309 if (Op->isVPOpcode()) {
4310 Mask = Op.getOperand(1);
4311 if (VT.isFixedLengthVector())
4312 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
4313 Subtarget);
4314 VL = Op.getOperand(2);
4315 }
4316
4317 // We choose FP type that can represent the value if possible. Otherwise, we
4318 // use rounding to zero conversion for correct exponent of the result.
4319 // TODO: Use f16 for i8 when possible?
4320 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
4321 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
4322 FloatEltVT = MVT::f32;
4323 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
4324
4325 // Legal types should have been checked in the RISCVTargetLowering
4326 // constructor.
4327 // TODO: Splitting may make sense in some cases.
4328 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
4329 "Expected legal float type!");
4330
4331 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
4332 // The trailing zero count is equal to log2 of this single bit value.
4333 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
4334 SDValue Neg = DAG.getNegative(Src, DL, VT);
4335 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
4336 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
4337 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
4338 Src, Mask, VL);
4339 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
4340 }
4341
4342 // We have a legal FP type, convert to it.
4343 SDValue FloatVal;
4344 if (FloatVT.bitsGT(VT)) {
4345 if (Op->isVPOpcode())
4346 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
4347 else
4348 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
4349 } else {
4350 // Use RTZ to avoid rounding influencing exponent of FloatVal.
4351 if (VT.isFixedLengthVector()) {
4352 ContainerVT = getContainerForFixedLengthVector(VT);
4353 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4354 }
4355 if (!Op->isVPOpcode())
4356 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4357 SDValue RTZRM =
4359 MVT ContainerFloatVT =
4360 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
4361 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
4362 Src, Mask, RTZRM, VL);
4363 if (VT.isFixedLengthVector())
4364 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
4365 }
4366 // Bitcast to integer and shift the exponent to the LSB.
4367 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
4368 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
4369 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
4370
4371 SDValue Exp;
4372 // Restore back to original type. Truncation after SRL is to generate vnsrl.
4373 if (Op->isVPOpcode()) {
4374 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
4375 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
4376 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
4377 } else {
4378 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
4379 DAG.getConstant(ShiftAmt, DL, IntVT));
4380 if (IntVT.bitsLT(VT))
4381 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
4382 else if (IntVT.bitsGT(VT))
4383 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
4384 }
4385
4386 // The exponent contains log2 of the value in biased form.
4387 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
4388 // For trailing zeros, we just need to subtract the bias.
4389 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
4390 return DAG.getNode(ISD::SUB, DL, VT, Exp,
4391 DAG.getConstant(ExponentBias, DL, VT));
4392 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
4393 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
4394 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
4395
4396 // For leading zeros, we need to remove the bias and convert from log2 to
4397 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
4398 unsigned Adjust = ExponentBias + (EltSize - 1);
4399 SDValue Res;
4400 if (Op->isVPOpcode())
4401 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
4402 Mask, VL);
4403 else
4404 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
4405
4406 // The above result with zero input equals to Adjust which is greater than
4407 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
4408 if (Op.getOpcode() == ISD::CTLZ)
4409 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
4410 else if (Op.getOpcode() == ISD::VP_CTLZ)
4411 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
4412 DAG.getConstant(EltSize, DL, VT), Mask, VL);
4413 return Res;
4414}
4415
4416// While RVV has alignment restrictions, we should always be able to load as a
4417// legal equivalently-sized byte-typed vector instead. This method is
4418// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
4419// the load is already correctly-aligned, it returns SDValue().
4420SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
4421 SelectionDAG &DAG) const {
4422 auto *Load = cast<LoadSDNode>(Op);
4423 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
4424
4426 Load->getMemoryVT(),
4427 *Load->getMemOperand()))
4428 return SDValue();
4429
4430 SDLoc DL(Op);
4431 MVT VT = Op.getSimpleValueType();
4432 unsigned EltSizeBits = VT.getScalarSizeInBits();
4433 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
4434 "Unexpected unaligned RVV load type");
4435 MVT NewVT =
4436 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
4437 assert(NewVT.isValid() &&
4438 "Expecting equally-sized RVV vector types to be legal");
4439 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
4440 Load->getPointerInfo(), Load->getOriginalAlign(),
4441 Load->getMemOperand()->getFlags());
4442 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
4443}
4444
4445// While RVV has alignment restrictions, we should always be able to store as a
4446// legal equivalently-sized byte-typed vector instead. This method is
4447// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
4448// returns SDValue() if the store is already correctly aligned.
4449SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
4450 SelectionDAG &DAG) const {
4451 auto *Store = cast<StoreSDNode>(Op);
4452 assert(Store && Store->getValue().getValueType().isVector() &&
4453 "Expected vector store");
4454
4456 Store->getMemoryVT(),
4457 *Store->getMemOperand()))
4458 return SDValue();
4459
4460 SDLoc DL(Op);
4461 SDValue StoredVal = Store->getValue();
4462 MVT VT = StoredVal.getSimpleValueType();
4463 unsigned EltSizeBits = VT.getScalarSizeInBits();
4464 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
4465 "Unexpected unaligned RVV store type");
4466 MVT NewVT =
4467 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
4468 assert(NewVT.isValid() &&
4469 "Expecting equally-sized RVV vector types to be legal");
4470 StoredVal = DAG.getBitcast(NewVT, StoredVal);
4471 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
4472 Store->getPointerInfo(), Store->getOriginalAlign(),
4473 Store->getMemOperand()->getFlags());
4474}
4475
4477 const RISCVSubtarget &Subtarget) {
4478 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
4479
4480 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
4481
4482 // All simm32 constants should be handled by isel.
4483 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
4484 // this check redundant, but small immediates are common so this check
4485 // should have better compile time.
4486 if (isInt<32>(Imm))
4487 return Op;
4488
4489 // We only need to cost the immediate, if constant pool lowering is enabled.
4490 if (!Subtarget.useConstantPoolForLargeInts())
4491 return Op;
4492
4494 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
4495 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
4496 return Op;
4497
4498 // Special case. See if we can build the constant as (ADD (SLLI X, 32), X) do
4499 // that if it will avoid a constant pool.
4500 // It will require an extra temporary register though.
4501 if (!DAG.shouldOptForSize()) {
4502 int64_t LoVal = SignExtend64<32>(Imm);
4503 int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
4504 if (LoVal == HiVal) {
4505 RISCVMatInt::InstSeq SeqLo =
4506 RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
4507 if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
4508 return Op;
4509 }
4510 }
4511
4512 // Expand to a constant pool using the default expansion code.
4513 return SDValue();
4514}
4515
4517 const RISCVSubtarget &Subtarget) {
4518 SDLoc dl(Op);
4519 AtomicOrdering FenceOrdering =
4520 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4521 SyncScope::ID FenceSSID =
4522 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4523
4524 if (Subtarget.hasStdExtZtso()) {
4525 // The only fence that needs an instruction is a sequentially-consistent
4526 // cross-thread fence.
4527 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4528 FenceSSID == SyncScope::System)
4529 return Op;
4530
4531 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4532 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
4533 }
4534
4535 // singlethread fences only synchronize with signal handlers on the same
4536 // thread and thus only need to preserve instruction order, not actually
4537 // enforce memory ordering.
4538 if (FenceSSID == SyncScope::SingleThread)
4539 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4540 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
4541
4542 return Op;
4543}
4544
4545SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
4546 SelectionDAG &DAG) const {
4547 SDLoc DL(Op);
4548 MVT VT = Op.getSimpleValueType();
4549 MVT XLenVT = Subtarget.getXLenVT();
4550 auto CNode = cast<ConstantSDNode>(Op.getOperand(1));
4551 unsigned Check = CNode->getZExtValue();
4552 unsigned TDCMask = 0;
4553 if (Check & fcSNan)
4554 TDCMask |= RISCV::FPMASK_Signaling_NaN;
4555 if (Check & fcQNan)
4556 TDCMask |= RISCV::FPMASK_Quiet_NaN;
4557 if (Check & fcPosInf)
4559 if (Check & fcNegInf)
4561 if (Check & fcPosNormal)
4563 if (Check & fcNegNormal)
4565 if (Check & fcPosSubnormal)
4567 if (Check & fcNegSubnormal)
4569 if (Check & fcPosZero)
4570 TDCMask |= RISCV::FPMASK_Positive_Zero;
4571 if (Check & fcNegZero)
4572 TDCMask |= RISCV::FPMASK_Negative_Zero;
4573
4574 bool IsOneBitMask = isPowerOf2_32(TDCMask);
4575
4576 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
4577
4578 if (VT.isVector()) {
4579 SDValue Op0 = Op.getOperand(0);
4580 MVT VT0 = Op.getOperand(0).getSimpleValueType();
4581
4582 if (VT.isScalableVector()) {
4584 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
4585 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
4586 VL, Op->getFlags());
4587 if (IsOneBitMask)
4588 return DAG.getSetCC(DL, VT, FPCLASS,
4589 DAG.getConstant(TDCMask, DL, DstVT),
4591 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
4592 DAG.getConstant(TDCMask, DL, DstVT));
4593 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
4594 ISD::SETNE);
4595 }
4596
4597 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
4598 MVT ContainerVT = getContainerForFixedLengthVector(VT);
4599 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
4600 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
4601
4602 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
4603
4604 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
4605 Mask, VL, Op->getFlags());
4606
4607 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
4608 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
4609 if (IsOneBitMask) {
4610 SDValue VMSEQ =
4611 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
4612 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
4613 DAG.getUNDEF(ContainerVT), Mask, VL});
4614 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
4615 }
4616 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
4617 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
4618
4619 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
4620 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
4621 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
4622
4623 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
4624 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
4625 DAG.getUNDEF(ContainerVT), Mask, VL});
4626 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
4627 }
4628
4629 SDValue FPCLASS = DAG.getNode(RISCVISD::FPCLASS, DL, VT, Op.getOperand(0));
4630 SDValue AND = DAG.getNode(ISD::AND, DL, VT, FPCLASS, TDCMaskV);
4631 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, XLenVT),
4633}
4634
4635// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
4636// operations propagate nans.
4638 const RISCVSubtarget &Subtarget) {
4639 SDLoc DL(Op);
4640 EVT VT = Op.getValueType();
4641
4642 SDValue X = Op.getOperand(0);
4643 SDValue Y = Op.getOperand(1);
4644
4645 MVT XLenVT = Subtarget.getXLenVT();
4646
4647 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
4648 // ensures that when one input is a nan, the other will also be a nan allowing
4649 // the nan to propagate. If both inputs are nan, this will swap the inputs
4650 // which is harmless.
4651 // FIXME: Handle nonans FMF and use isKnownNeverNaN.
4652 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
4653 SDValue NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
4654
4655 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
4656 SDValue NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
4657
4658 unsigned Opc =
4659 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
4660 return DAG.getNode(Opc, DL, VT, NewX, NewY);
4661}
4662
4663/// Get a RISCV target specified VL op for a given SDNode.
4664static unsigned getRISCVVLOp(SDValue Op) {
4665#define OP_CASE(NODE) \
4666 case ISD::NODE: \
4667 return RISCVISD::NODE##_VL;
4668 switch (Op.getOpcode()) {
4669 default:
4670 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
4671 // clang-format off
4672 OP_CASE(ADD)
4673 OP_CASE(SUB)
4674 OP_CASE(MUL)
4675 OP_CASE(MULHS)
4676 OP_CASE(MULHU)
4677 OP_CASE(SDIV)
4678 OP_CASE(SREM)
4679 OP_CASE(UDIV)
4680 OP_CASE(UREM)
4681 OP_CASE(SHL)
4682 OP_CASE(SRA)
4683 OP_CASE(SRL)
4684 OP_CASE(SADDSAT)
4685 OP_CASE(UADDSAT)
4686 OP_CASE(SSUBSAT)
4687 OP_CASE(USUBSAT)
4688 OP_CASE(FADD)
4689 OP_CASE(FSUB)
4690 OP_CASE(FMUL)
4691 OP_CASE(FDIV)
4692 OP_CASE(FNEG)
4693 OP_CASE(FABS)
4694 OP_CASE(FSQRT)
4695 OP_CASE(SMIN)
4696 OP_CASE(SMAX)
4697 OP_CASE(UMIN)
4698 OP_CASE(UMAX)
4699 OP_CASE(FMINNUM)
4700 OP_CASE(FMAXNUM)
4701 OP_CASE(STRICT_FADD)
4702 OP_CASE(STRICT_FSUB)
4703 OP_CASE(STRICT_FMUL)
4704 OP_CASE(STRICT_FDIV)
4705 OP_CASE(STRICT_FSQRT)
4706 // clang-format on
4707#undef OP_CASE
4708 case ISD::FMA:
4709 return RISCVISD::VFMADD_VL;
4710 case ISD::STRICT_FMA:
4712 case ISD::AND:
4713 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
4714 return RISCVISD::VMAND_VL;
4715 return RISCVISD::AND_VL;
4716 case ISD::OR:
4717 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
4718 return RISCVISD::VMOR_VL;
4719 return RISCVISD::OR_VL;
4720 case ISD::XOR:
4721 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
4722 return RISCVISD::VMXOR_VL;
4723 return RISCVISD::XOR_VL;
4724 }
4725}
4726
4727/// Return true if a RISC-V target specified op has a merge operand.
4728static bool hasMergeOp(unsigned Opcode) {
4729 assert(Opcode > RISCVISD::FIRST_NUMBER &&
4731 "not a RISC-V target specific op");
4733 "adding target specific op should update this function");
4734 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::FMAXNUM_VL)
4735 return true;
4736 if (Opcode == RISCVISD::FCOPYSIGN_VL)
4737 return true;
4738 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
4739 return true;
4740 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
4741 return true;
4742 return false;
4743}
4744
4745/// Return true if a RISC-V target specified op has a mask operand.
4746static bool hasMaskOp(unsigned Opcode) {
4747 assert(Opcode > RISCVISD::FIRST_NUMBER &&
4749 "not a RISC-V target specific op");
4751 "adding target specific op should update this function");
4752 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
4753 return true;
4754 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
4755 return true;
4756 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
4758 return true;
4759 return false;
4760}
4761
4763 SelectionDAG &DAG) const {
4764 switch (Op.getOpcode()) {
4765 default:
4766 report_fatal_error("unimplemented operand");
4767 case ISD::ATOMIC_FENCE:
4768 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
4769 case ISD::GlobalAddress:
4770 return lowerGlobalAddress(Op, DAG);
4771 case ISD::BlockAddress:
4772 return lowerBlockAddress(Op, DAG);
4773 case ISD::ConstantPool:
4774 return lowerConstantPool(Op, DAG);
4775 case ISD::JumpTable:
4776 return lowerJumpTable(Op, DAG);
4778 return lowerGlobalTLSAddress(Op, DAG);
4779 case ISD::Constant:
4780 return lowerConstant(Op, DAG, Subtarget);
4781 case ISD::SELECT:
4782 return lowerSELECT(Op, DAG);
4783 case ISD::BRCOND:
4784 return lowerBRCOND(Op, DAG);
4785 case ISD::VASTART:
4786 return lowerVASTART(Op, DAG);
4787 case ISD::FRAMEADDR:
4788 return lowerFRAMEADDR(Op, DAG);
4789 case ISD::RETURNADDR:
4790 return lowerRETURNADDR(Op, DAG);
4791 case ISD::SHL_PARTS:
4792 return lowerShiftLeftParts(Op, DAG);
4793 case ISD::SRA_PARTS:
4794 return lowerShiftRightParts(Op, DAG, true);
4795 case ISD::SRL_PARTS:
4796 return lowerShiftRightParts(Op, DAG, false);
4797 case ISD::ROTL:
4798 case ISD::ROTR:
4799 assert(Subtarget.hasVendorXTHeadBb() &&
4800 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
4801 "Unexpected custom legalization");
4802 // XTHeadBb only supports rotate by constant.
4803 if (!isa<ConstantSDNode>(Op.getOperand(1)))
4804 return SDValue();
4805 return Op;
4806 case ISD::BITCAST: {
4807 SDLoc DL(Op);
4808 EVT VT = Op.getValueType();
4809 SDValue Op0 = Op.getOperand(0);
4810 EVT Op0VT = Op0.getValueType();
4811 MVT XLenVT = Subtarget.getXLenVT();
4812 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
4814 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
4815 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
4816 return FPConv;
4817 }
4818 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
4819 Subtarget.hasStdExtZfbfmin()) {
4820 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
4821 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
4822 return FPConv;
4823 }
4824 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
4825 Subtarget.hasStdExtFOrZfinx()) {
4826 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4827 SDValue FPConv =
4828 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
4829 return FPConv;
4830 }
4831 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 &&
4832 Subtarget.hasStdExtZfa()) {
4833 SDValue Lo, Hi;
4834 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
4835 SDValue RetReg =
4836 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
4837 return RetReg;
4838 }
4839
4840 // Consider other scalar<->scalar casts as legal if the types are legal.
4841 // Otherwise expand them.
4842 if (!VT.isVector() && !Op0VT.isVector()) {
4843 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
4844 return Op;
4845 return SDValue();
4846 }
4847
4848 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
4849 "Unexpected types");
4850
4851 if (VT.isFixedLengthVector()) {
4852 // We can handle fixed length vector bitcasts with a simple replacement
4853 // in isel.
4854 if (Op0VT.isFixedLengthVector())
4855 return Op;
4856 // When bitcasting from scalar to fixed-length vector, insert the scalar
4857 // into a one-element vector of the result type, and perform a vector
4858 // bitcast.
4859 if (!Op0VT.isVector()) {
4860 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
4861 if (!isTypeLegal(BVT))
4862 return SDValue();
4863 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
4864 DAG.getUNDEF(BVT), Op0,
4865 DAG.getConstant(0, DL, XLenVT)));
4866 }
4867 return SDValue();
4868 }
4869 // Custom-legalize bitcasts from fixed-length vector types to scalar types
4870 // thus: bitcast the vector to a one-element vector type whose element type
4871 // is the same as the result type, and extract the first element.
4872 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
4873 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
4874 if (!isTypeLegal(BVT))
4875 return SDValue();
4876 SDValue BVec = DAG.getBitcast(BVT, Op0);
4877 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
4878 DAG.getConstant(0, DL, XLenVT));
4879 }
4880 return SDValue();
4881 }
4883 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4885 return LowerINTRINSIC_W_CHAIN(Op, DAG);
4887 return LowerINTRINSIC_VOID(Op, DAG);
4888 case ISD::IS_FPCLASS:
4889 return LowerIS_FPCLASS(Op, DAG);
4890 case ISD::BITREVERSE: {
4891 MVT VT = Op.getSimpleValueType();
4892 SDLoc DL(Op);
4893 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
4894 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
4895 // Expand bitreverse to a bswap(rev8) followed by brev8.
4896 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
4897 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
4898 }
4899 case ISD::TRUNCATE:
4900 // Only custom-lower vector truncates
4901 if (!Op.getSimpleValueType().isVector())
4902 return Op;
4903 return lowerVectorTruncLike(Op, DAG);
4904 case ISD::ANY_EXTEND:
4905 case ISD::ZERO_EXTEND:
4906 if (Op.getOperand(0).getValueType().isVector() &&
4907 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
4908 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
4909 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
4910 case ISD::SIGN_EXTEND:
4911 if (Op.getOperand(0).getValueType().isVector() &&
4912 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
4913 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
4914 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
4916 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
4918 return lowerINSERT_VECTOR_ELT(Op, DAG);
4920 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
4921 case ISD::SCALAR_TO_VECTOR: {
4922 MVT VT = Op.getSimpleValueType();
4923 SDLoc DL(Op);
4924 SDValue Scalar = Op.getOperand(0);
4925 if (VT.getVectorElementType() == MVT::i1) {
4926 MVT WideVT = VT.changeVectorElementType(MVT::i8);
4927 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
4928 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
4929 }
4930 MVT ContainerVT = VT;
4931 if (VT.isFixedLengthVector())
4932 ContainerVT = getContainerForFixedLengthVector(VT);
4933 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
4934 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
4935 DAG.getUNDEF(ContainerVT), Scalar, VL);
4936 if (VT.isFixedLengthVector())
4937 V = convertFromScalableVector(VT, V, DAG, Subtarget);
4938 return V;
4939 }
4940 case ISD::VSCALE: {
4941 MVT VT = Op.getSimpleValueType();
4942 SDLoc DL(Op);
4943 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
4944 // We define our scalable vector types for lmul=1 to use a 64 bit known
4945 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
4946 // vscale as VLENB / 8.
4947 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
4948 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
4949 report_fatal_error("Support for VLEN==32 is incomplete.");
4950 // We assume VLENB is a multiple of 8. We manually choose the best shift
4951 // here because SimplifyDemandedBits isn't always able to simplify it.
4952 uint64_t Val = Op.getConstantOperandVal(0);
4953 if (isPowerOf2_64(Val)) {
4954 uint64_t Log2 = Log2_64(Val);
4955 if (Log2 < 3)
4956 return DAG.getNode(ISD::SRL, DL, VT, VLENB,
4957 DAG.getConstant(3 - Log2, DL, VT));
4958 if (Log2 > 3)
4959 return DAG.getNode(ISD::SHL, DL, VT, VLENB,
4960 DAG.getConstant(Log2 - 3, DL, VT));
4961 return VLENB;
4962 }
4963 // If the multiplier is a multiple of 8, scale it down to avoid needing
4964 // to shift the VLENB value.
4965 if ((Val % 8) == 0)
4966 return DAG.getNode(ISD::MUL, DL, VT, VLENB,
4967 DAG.getConstant(Val / 8, DL, VT));
4968
4969 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
4970 DAG.getConstant(3, DL, VT));
4971 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
4972 }
4973 case ISD::FPOWI: {
4974 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
4975 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
4976 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
4977 Op.getOperand(1).getValueType() == MVT::i32) {
4978 SDLoc DL(Op);
4979 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
4980 SDValue Powi =
4981 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
4982 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
4983 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
4984 }
4985 return SDValue();
4986 }
4987 case ISD::FMAXIMUM:
4988 case ISD::FMINIMUM:
4989 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
4990 case ISD::FP_EXTEND: {
4991 SDLoc DL(Op);
4992 EVT VT = Op.getValueType();
4993 SDValue Op0 = Op.getOperand(0);
4994 EVT Op0VT = Op0.getValueType();
4995 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
4996 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
4997 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
4998 SDValue FloatVal =
4999 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
5000 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
5001 }
5002
5003 if (!Op.getValueType().isVector())
5004 return Op;
5005 return lowerVectorFPExtendOrRoundLike(Op, DAG);
5006 }
5007 case ISD::FP_ROUND: {
5008 SDLoc DL(Op);
5009 EVT VT = Op.getValueType();
5010 SDValue Op0 = Op.getOperand(0);
5011 EVT Op0VT = Op0.getValueType();
5012 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
5013 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
5014 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
5015 Subtarget.hasStdExtDOrZdinx()) {
5016 SDValue FloatVal =
5017 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
5018 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
5019 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
5020 }
5021
5022 if (!Op.getValueType().isVector())
5023 return Op;
5024 return lowerVectorFPExtendOrRoundLike(Op, DAG);
5025 }
5028 return lowerStrictFPExtendOrRoundLike(Op, DAG);
5029 case ISD::FP_TO_SINT:
5030 case ISD::FP_TO_UINT:
5031 case ISD::SINT_TO_FP:
5032 case ISD::UINT_TO_FP:
5037 // RVV can only do fp<->int conversions to types half/double the size as
5038 // the source. We custom-lower any conversions that do two hops into
5039 // sequences.
5040 MVT VT = Op.getSimpleValueType();
5041 if (!VT.isVector())
5042 return Op;
5043 SDLoc DL(Op);
5044 bool IsStrict = Op->isStrictFPOpcode();
5045 SDValue Src = Op.getOperand(0 + IsStrict);
5046 MVT EltVT = VT.getVectorElementType();
5047 MVT SrcVT = Src.getSimpleValueType();
5048 MVT SrcEltVT = SrcVT.getVectorElementType();
5049 unsigned EltSize = EltVT.getSizeInBits();
5050 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
5051 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
5052 "Unexpected vector element types");
5053
5054 bool IsInt2FP = SrcEltVT.isInteger();
5055 // Widening conversions
5056 if (EltSize > (2 * SrcEltSize)) {
5057 if (IsInt2FP) {
5058 // Do a regular integer sign/zero extension then convert to float.
5059 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
5061 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
5062 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
5065 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
5066 if (IsStrict)
5067 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
5068 Op.getOperand(0), Ext);
5069 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
5070 }
5071 // FP2Int
5072 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
5073 // Do one doubling fp_extend then complete the operation by converting
5074 // to int.
5075 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
5076 if (IsStrict) {
5077 auto [FExt, Chain] =
5078 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
5079 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
5080 }
5081 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
5082 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
5083 }
5084
5085 // Narrowing conversions
5086 if (SrcEltSize > (2 * EltSize)) {
5087 if (IsInt2FP) {
5088 // One narrowing int_to_fp, then an fp_round.
5089 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
5090 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
5091 if (IsStrict) {
5092 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
5093 DAG.getVTList(InterimFVT, MVT::Other),
5094 Op.getOperand(0), Src);
5095 SDValue Chain = Int2FP.getValue(1);
5096 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
5097 }
5098 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
5099 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
5100 }
5101 // FP2Int
5102 // One narrowing fp_to_int, then truncate the integer. If the float isn't
5103 // representable by the integer, the result is poison.
5104 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
5106 if (IsStrict) {
5107 SDValue FP2Int =
5108 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
5109 Op.getOperand(0), Src);
5110 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
5111 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
5112 }
5113 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
5114 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
5115 }
5116
5117 // Scalable vectors can exit here. Patterns will handle equally-sized
5118 // conversions halving/doubling ones.
5119 if (!VT.isFixedLengthVector())
5120 return Op;
5121
5122 // For fixed-length vectors we lower to a custom "VL" node.
5123 unsigned RVVOpc = 0;
5124 switch (Op.getOpcode()) {
5125 default:
5126 llvm_unreachable("Impossible opcode");
5127 case ISD::FP_TO_SINT:
5129 break;
5130 case ISD::FP_TO_UINT:
5132 break;
5133 case ISD::SINT_TO_FP:
5134 RVVOpc = RISCVISD::SINT_TO_FP_VL;
5135 break;
5136 case ISD::UINT_TO_FP:
5137 RVVOpc = RISCVISD::UINT_TO_FP_VL;
5138 break;
5141 break;
5144 break;
5147 break;
5150 break;
5151 }
5152
5153 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5154 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
5155 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
5156 "Expected same element count");
5157
5158 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5159
5160 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
5161 if (IsStrict) {
5162 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
5163 Op.getOperand(0), Src, Mask, VL);
5164 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
5165 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
5166 }
5167 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
5168 return convertFromScalableVector(VT, Src, DAG, Subtarget);
5169 }
5172 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
5173 case ISD::FP_TO_BF16: {
5174 // Custom lower to ensure the libcall return is passed in an FPR on hard
5175 // float ABIs.
5176 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
5177 SDLoc DL(Op);
5178 MakeLibCallOptions CallOptions;
5179 RTLIB::Libcall LC =
5180 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
5181 SDValue Res =
5182 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
5183 if (Subtarget.is64Bit())
5184 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
5185 return DAG.getBitcast(MVT::i32, Res);
5186 }
5187 case ISD::BF16_TO_FP: {
5188 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
5189 MVT VT = Op.getSimpleValueType();
5190 SDLoc DL(Op);
5191 Op = DAG.getNode(
5192 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
5193 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
5194 SDValue Res = Subtarget.is64Bit()
5195 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
5196 : DAG.getBitcast(MVT::f32, Op);
5197 // fp_extend if the target VT is bigger than f32.
5198 if (VT != MVT::f32)
5199 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
5200 return Res;
5201 }
5202 case ISD::FP_TO_FP16: {
5203 // Custom lower to ensure the libcall return is passed in an FPR on hard
5204 // float ABIs.
5205 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
5206 SDLoc DL(Op);
5207 MakeLibCallOptions CallOptions;
5208 RTLIB::Libcall LC =
5209 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
5210 SDValue Res =
5211 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
5212 if (Subtarget.is64Bit())
5213 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
5214 return DAG.getBitcast(MVT::i32, Res);
5215 }
5216 case ISD::FP16_TO_FP: {
5217 // Custom lower to ensure the libcall argument is passed in an FPR on hard
5218 // float ABIs.
5219 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
5220 SDLoc DL(Op);
5221 MakeLibCallOptions CallOptions;
5222 SDValue Arg = Subtarget.is64Bit()
5223 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
5224 Op.getOperand(0))
5225 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
5226 SDValue Res =
5227 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
5228 .first;
5229 return Res;
5230 }
5231 case ISD::FTRUNC:
5232 case ISD::FCEIL:
5233 case ISD::FFLOOR:
5234 case ISD::FNEARBYINT:
5235 case ISD::FRINT:
5236 case ISD::FROUND:
5237 case ISD::FROUNDEVEN:
5238 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
5239 case ISD::VECREDUCE_ADD:
5240 case ISD::VECREDUCE_UMAX:
5241 case ISD::VECREDUCE_SMAX:
5242 case ISD::VECREDUCE_UMIN:
5243 case ISD::VECREDUCE_SMIN:
5244 return lowerVECREDUCE(Op, DAG);
5245 case ISD::VECREDUCE_AND:
5246 case ISD::VECREDUCE_OR:
5247 case ISD::VECREDUCE_XOR:
5248 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
5249 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
5250 return lowerVECREDUCE(Op, DAG);
5251 case ISD::VECREDUCE_FADD:
5252 case ISD::VECREDUCE_SEQ_FADD:
5253 case ISD::VECREDUCE_FMIN:
5254 case ISD::VECREDUCE_FMAX:
5255 return lowerFPVECREDUCE(Op, DAG);
5256 case ISD::VP_REDUCE_ADD:
5257 case ISD::VP_REDUCE_UMAX:
5258 case ISD::VP_REDUCE_SMAX:
5259 case ISD::VP_REDUCE_UMIN:
5260 case ISD::VP_REDUCE_SMIN:
5261 case ISD::VP_REDUCE_FADD:
5262 case ISD::VP_REDUCE_SEQ_FADD:
5263 case ISD::VP_REDUCE_FMIN:
5264 case ISD::VP_REDUCE_FMAX:
5265 return lowerVPREDUCE(Op, DAG);
5266 case ISD::VP_REDUCE_AND:
5267 case ISD::VP_REDUCE_OR:
5268 case ISD::VP_REDUCE_XOR:
5269 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
5270 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
5271 return lowerVPREDUCE(Op, DAG);
5272 case ISD::UNDEF: {
5273 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
5274 return convertFromScalableVector(Op.getSimpleValueType(),
5275 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
5276 }
5278 return lowerINSERT_SUBVECTOR(Op, DAG);
5280 return lowerEXTRACT_SUBVECTOR(Op, DAG);
5282 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
5284 return lowerVECTOR_INTERLEAVE(Op, DAG);
5285 case ISD::STEP_VECTOR:
5286 return lowerSTEP_VECTOR(Op, DAG);
5288 return lowerVECTOR_REVERSE(Op, DAG);
5289 case ISD::VECTOR_SPLICE:
5290 return lowerVECTOR_SPLICE(Op, DAG);
5291 case ISD::BUILD_VECTOR:
5292 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
5293 case ISD::SPLAT_VECTOR:
5294 if (Op.getValueType().getVectorElementType() == MVT::i1)
5295 return lowerVectorMaskSplat(Op, DAG);
5296 return SDValue();
5298 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
5299 case ISD::CONCAT_VECTORS: {
5300 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
5301 // better than going through the stack, as the default expansion does.
5302 SDLoc DL(Op);
5303 MVT VT = Op.getSimpleValueType();
5304 unsigned NumOpElts =
5305 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
5306 SDValue Vec = DAG.getUNDEF(VT);
5307 for (const auto &OpIdx : enumerate(Op->ops())) {
5308 SDValue SubVec = OpIdx.value();
5309 // Don't insert undef subvectors.
5310 if (SubVec.isUndef())
5311 continue;
5312 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
5313 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
5314 }
5315 return Vec;
5316 }
5317 case ISD::LOAD:
5318 if (auto V = expandUnalignedRVVLoad(Op, DAG))
5319 return V;
5320 if (Op.getValueType().isFixedLengthVector())
5321 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
5322 return Op;
5323 case ISD::STORE:
5324 if (auto V = expandUnalignedRVVStore(Op, DAG))
5325 return V;
5326 if (Op.getOperand(1).getValueType().isFixedLengthVector())
5327 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
5328 return Op;
5329 case ISD::MLOAD:
5330 case ISD::VP_LOAD:
5331 return lowerMaskedLoad(Op, DAG);
5332 case ISD::MSTORE:
5333 case ISD::VP_STORE:
5334 return lowerMaskedStore(Op, DAG);
5335 case ISD::SELECT_CC: {
5336 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
5337 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
5338 // into separate SETCC+SELECT just like LegalizeDAG.
5339 SDValue Tmp1 = Op.getOperand(0);
5340 SDValue Tmp2 = Op.getOperand(1);
5341 SDValue True = Op.getOperand(2);
5342 SDValue False = Op.getOperand(3);
5343 EVT VT = Op.getValueType();
5344 SDValue CC = Op.getOperand(4);
5345 EVT CmpVT = Tmp1.getValueType();
5346 EVT CCVT =
5347 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
5348 SDLoc DL(Op);
5349 SDValue Cond =
5350 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
5351 return DAG.getSelect(DL, VT, Cond, True, False);
5352 }
5353 case ISD::SETCC: {
5354 MVT OpVT = Op.getOperand(0).getSimpleValueType();
5355 if (OpVT.isScalarInteger()) {
5356 MVT VT = Op.getSimpleValueType();
5357 SDValue LHS = Op.getOperand(0);
5358 SDValue RHS = Op.getOperand(1);
5359 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
5360 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
5361 "Unexpected CondCode");
5362
5363 SDLoc DL(Op);
5364
5365 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
5366 // convert this to the equivalent of (set(u)ge X, C+1) by using
5367 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
5368 // in a register.
5369 if (isa<ConstantSDNode>(RHS)) {
5370 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
5371 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
5372 // If this is an unsigned compare and the constant is -1, incrementing
5373 // the constant would change behavior. The result should be false.
5374 if (CCVal == ISD::SETUGT && Imm == -1)
5375 return DAG.getConstant(0, DL, VT);
5376 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
5377 CCVal = ISD::getSetCCSwappedOperands(CCVal);
5378 SDValue SetCC = DAG.getSetCC(
5379 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
5380 return DAG.getLogicalNOT(DL, SetCC, VT);
5381 }
5382 }
5383
5384 // Not a constant we could handle, swap the operands and condition code to
5385 // SETLT/SETULT.
5386 CCVal = ISD::getSetCCSwappedOperands(CCVal);
5387 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
5388 }
5389
5390 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
5391 }
5392 case ISD::ADD:
5393 case ISD::SUB:
5394 case ISD::MUL:
5395 case ISD::MULHS:
5396 case ISD::MULHU:
5397 case ISD::AND:
5398 case ISD::OR:
5399 case ISD::XOR:
5400 case ISD::SDIV:
5401 case ISD::SREM:
5402 case ISD::UDIV:
5403 case ISD::UREM:
5404 return lowerToScalableOp(Op, DAG);
5405 case ISD::SHL:
5406 case ISD::SRA:
5407 case ISD::SRL:
5408 if (Op.getSimpleValueType().isFixedLengthVector())
5409 return lowerToScalableOp(Op, DAG);
5410 // This can be called for an i32 shift amount that needs to be promoted.
5411 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
5412 "Unexpected custom legalisation");
5413 return SDValue();
5414 case ISD::SADDSAT:
5415 case ISD::UADDSAT:
5416 case ISD::SSUBSAT:
5417 case ISD::USUBSAT:
5418 case ISD::FADD:
5419 case ISD::FSUB:
5420 case ISD::FMUL:
5421 case ISD::FDIV:
5422 case ISD::FNEG:
5423 case ISD::FABS:
5424 case ISD::FSQRT:
5425 case ISD::FMA:
5426 case ISD::SMIN:
5427 case ISD::SMAX:
5428 case ISD::UMIN:
5429 case ISD::UMAX:
5430 case ISD::FMINNUM:
5431 case ISD::FMAXNUM:
5432 return lowerToScalableOp(Op, DAG);
5433 case ISD::ABS:
5434 case ISD::VP_ABS:
5435 return lowerABS(Op, DAG);
5436 case ISD::CTLZ:
5439 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
5440 case ISD::VSELECT:
5441 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
5442 case ISD::FCOPYSIGN:
5443 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
5444 case ISD::STRICT_FADD:
5445 case ISD::STRICT_FSUB:
5446 case ISD::STRICT_FMUL:
5447 case ISD::STRICT_FDIV:
5448 case ISD::STRICT_FSQRT:
5449 case ISD::STRICT_FMA:
5450 return lowerToScalableOp(Op, DAG);
5451 case ISD::STRICT_FSETCC:
5453 return lowerVectorStrictFSetcc(Op, DAG);
5454 case ISD::STRICT_FCEIL:
5455 case ISD::STRICT_FRINT:
5456 case ISD::STRICT_FFLOOR:
5457 case ISD::STRICT_FTRUNC:
5459 case ISD::STRICT_FROUND:
5461 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
5462 case ISD::MGATHER:
5463 case ISD::VP_GATHER:
5464 return lowerMaskedGather(Op, DAG);
5465 case ISD::MSCATTER:
5466 case ISD::VP_SCATTER:
5467 return lowerMaskedScatter(Op, DAG);
5468 case ISD::GET_ROUNDING:
5469 return lowerGET_ROUNDING(Op, DAG);
5470 case ISD::SET_ROUNDING:
5471 return lowerSET_ROUNDING(Op, DAG);
5472 case ISD::EH_DWARF_CFA:
5473 return lowerEH_DWARF_CFA(Op, DAG);
5474 case ISD::VP_SELECT:
5475 return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
5476 case ISD::VP_MERGE:
5477 return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
5478 case ISD::VP_ADD:
5479 return lowerVPOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true);
5480 case ISD::VP_SUB:
5481 return lowerVPOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true);
5482 case ISD::VP_MUL:
5483 return lowerVPOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true);
5484 case ISD::VP_SDIV:
5485 return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true);
5486 case ISD::VP_UDIV:
5487 return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true);
5488 case ISD::VP_SREM:
5489 return lowerVPOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true);
5490 case ISD::VP_UREM:
5491 return lowerVPOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true);
5492 case ISD::VP_AND:
5493 return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
5494 case ISD::VP_OR:
5495 return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
5496 case ISD::VP_XOR:
5497 return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
5498 case ISD::VP_ASHR:
5499 return lowerVPOp(Op, DAG, RISCVISD::SRA_VL, /*HasMergeOp*/ true);
5500 case ISD::VP_LSHR:
5501 return lowerVPOp(Op, DAG, RISCVISD::SRL_VL, /*HasMergeOp*/ true);
5502 case ISD::VP_SHL:
5503 return lowerVPOp(Op, DAG, RISCVISD::SHL_VL, /*HasMergeOp*/ true);
5504 case ISD::VP_FADD:
5505 return lowerVPOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true);
5506 case ISD::VP_FSUB:
5507 return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true);
5508 case ISD::VP_FMUL:
5509 return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true);
5510 case ISD::VP_FDIV:
5511 return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true);
5512 case ISD::VP_FNEG:
5513 return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
5514 case ISD::VP_FABS:
5515 return lowerVPOp(Op, DAG, RISCVISD::FABS_VL);
5516 case ISD::VP_SQRT:
5517 return lowerVPOp(Op, DAG, RISCVISD::FSQRT_VL);
5518 case ISD::VP_FMA:
5519 return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL);
5520 case ISD::VP_FMINNUM:
5521 return lowerVPOp(Op, DAG, RISCVISD::FMINNUM_VL, /*HasMergeOp*/ true);
5522 case ISD::VP_FMAXNUM:
5523 return lowerVPOp(Op, DAG, RISCVISD::FMAXNUM_VL, /*HasMergeOp*/ true);
5524 case ISD::VP_FCOPYSIGN:
5525 return lowerVPOp(Op, DAG, RISCVISD::FCOPYSIGN_VL, /*HasMergeOp*/ true);
5526 case ISD::VP_SIGN_EXTEND:
5527 case ISD::VP_ZERO_EXTEND:
5528 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
5529 return lowerVPExtMaskOp(Op, DAG);
5530 return lowerVPOp(Op, DAG,
5531 Op.getOpcode() == ISD::VP_SIGN_EXTEND
5534 case ISD::VP_TRUNCATE:
5535 return lowerVectorTruncLike(Op, DAG);
5536 case ISD::VP_FP_EXTEND:
5537 case ISD::VP_FP_ROUND:
5538 return lowerVectorFPExtendOrRoundLike(Op, DAG);
5539 case ISD::VP_FP_TO_SINT:
5540 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_X_F_VL);
5541 case ISD::VP_FP_TO_UINT:
5542 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_XU_F_VL);
5543 case ISD::VP_SINT_TO_FP:
5544 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL);
5545 case ISD::VP_UINT_TO_FP:
5546 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL);
5547 case ISD::VP_SETCC:
5548 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
5549 return lowerVPSetCCMaskOp(Op, DAG);
5550 return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true);
5551 case ISD::VP_SMIN:
5552 return lowerVPOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true);
5553 case ISD::VP_SMAX:
5554 return lowerVPOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true);
5555 case ISD::VP_UMIN:
5556 return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true);
5557 case ISD::VP_UMAX:
5558 return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true);
5559 case ISD::VP_BITREVERSE:
5560 return lowerVPOp(Op, DAG, RISCVISD::BITREVERSE_VL, /*HasMergeOp*/ true);
5561 case ISD::VP_BSWAP:
5562 return lowerVPOp(Op, DAG, RISCVISD::BSWAP_VL, /*HasMergeOp*/ true);
5563 case ISD::VP_CTLZ:
5564 case ISD::VP_CTLZ_ZERO_UNDEF:
5565 if (Subtarget.hasStdExtZvbb())
5566 return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true);
5567 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
5568 case ISD::VP_CTTZ:
5569 case ISD::VP_CTTZ_ZERO_UNDEF:
5570 if (Subtarget.hasStdExtZvbb())
5571 return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true);
5572 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
5573 case ISD::VP_CTPOP:
5574 return lowerVPOp(Op, DAG, RISCVISD::CTPOP_VL, /*HasMergeOp*/ true);
5575 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
5576 return lowerVPStridedLoad(Op, DAG);
5577 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
5578 return lowerVPStridedStore(Op, DAG);
5579 case ISD::VP_FCEIL:
5580 case ISD::VP_FFLOOR:
5581 case ISD::VP_FRINT:
5582 case ISD::VP_FNEARBYINT:
5583 case ISD::VP_FROUND:
5584 case ISD::VP_FROUNDEVEN:
5585 case ISD::VP_FROUNDTOZERO:
5586 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
5587 }
5588}
5589
5591 SelectionDAG &DAG, unsigned Flags) {
5592 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
5593}
5594
5596 SelectionDAG &DAG, unsigned Flags) {
5597 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
5598 Flags);
5599}
5600
5602 SelectionDAG &DAG, unsigned Flags) {
5603 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
5604 N->getOffset(), Flags);
5605}
5606
5608 SelectionDAG &DAG, unsigned Flags) {
5609 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
5610}
5611
5612template <class NodeTy>
5613SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
5614 bool IsLocal, bool IsExternWeak) const {
5615 SDLoc DL(N);
5616 EVT Ty = getPointerTy(DAG.getDataLayout());
5617
5618 // When HWASAN is used and tagging of global variables is enabled
5619 // they should be accessed via the GOT, since the tagged address of a global
5620 // is incompatible with existing code models. This also applies to non-pic
5621 // mode.
5622 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
5623 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
5624 if (IsLocal && !Subtarget.allowTaggedGlobals())
5625 // Use PC-relative addressing to access the symbol. This generates the
5626 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
5627 // %pcrel_lo(auipc)).
5628 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
5629
5630 // Use PC-relative addressing to access the GOT for this symbol, then load
5631 // the address from the GOT. This generates the pattern (PseudoLGA sym),
5632 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
5638 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
5639 SDValue Load =
5640 DAG.getMemIntrinsicNode(RISCVISD::LGA, DL, DAG.getVTList(Ty, MVT::Other),
5641 {DAG.getEntryNode(), Addr}, Ty, MemOp);
5642 return Load;
5643 }
5644
5645 switch (getTargetMachine().getCodeModel()) {
5646 default:
5647 report_fatal_error("Unsupported code model for lowering");
5648 case CodeModel::Small: {
5649 // Generate a sequence for accessing addresses within the first 2 GiB of
5650 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
5651 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
5652 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
5653 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
5654 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
5655 }
5656 case CodeModel::Medium: {
5657 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
5658 if (IsExternWeak) {
5659 // An extern weak symbol may be undefined, i.e. have value 0, which may
5660 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
5661 // symbol. This generates the pattern (PseudoLGA sym), which expands to
5662 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
5668 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
5669 SDValue Load =
5671 DAG.getVTList(Ty, MVT::Other),
5672 {DAG.getEntryNode(), Addr}, Ty, MemOp);
5673 return Load;
5674 }
5675
5676 // Generate a sequence for accessing addresses within any 2GiB range within
5677 // the address space. This generates the pattern (PseudoLLA sym), which
5678 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
5679 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
5680 }
5681 }
5682}
5683
5684SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
5685 SelectionDAG &DAG) const {
5687 assert(N->getOffset() == 0 && "unexpected offset in global node");
5688 const GlobalValue *GV = N->getGlobal();
5689 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
5690}
5691
5692SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
5693 SelectionDAG &DAG) const {
5695
5696 return getAddr(N, DAG);
5697}
5698
5699SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
5700 SelectionDAG &DAG) const {
5702
5703 return getAddr(N, DAG);
5704}
5705
5706SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
5707 SelectionDAG &DAG) const {
5709
5710 return getAddr(N, DAG);
5711}
5712
5713SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
5714 SelectionDAG &DAG,
5715 bool UseGOT) const {
5716 SDLoc DL(N);
5717 EVT Ty = getPointerTy(DAG.getDataLayout());
5718 const GlobalValue *GV = N->getGlobal();
5719 MVT XLenVT = Subtarget.getXLenVT();
5720
5721 if (UseGOT) {
5722 // Use PC-relative addressing to access the GOT for this TLS symbol, then
5723 // load the address from the GOT and add the thread pointer. This generates
5724 // the pattern (PseudoLA_TLS_IE sym), which expands to
5725 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
5726 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
5732 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
5734 RISCVISD::LA_TLS_IE, DL, DAG.getVTList(Ty, MVT::Other),
5735 {DAG.getEntryNode(), Addr}, Ty, MemOp);
5736
5737 // Add the thread pointer.
5738 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
5739 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
5740 }
5741
5742 // Generate a sequence for accessing the address relative to the thread
5743 // pointer, with the appropriate adjustment for the thread pointer offset.
5744 // This generates the pattern
5745 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
5746 SDValue AddrHi =
5748 SDValue AddrAdd =
5750 SDValue AddrLo =
5752
5753 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
5754 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
5755 SDValue MNAdd =
5756 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
5757 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
5758}
5759
5760SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
5761 SelectionDAG &DAG) const {
5762 SDLoc DL(N);
5763 EVT Ty = getPointerTy(DAG.getDataLayout());
5764 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
5765 const GlobalValue *GV = N->getGlobal();
5766
5767 // Use a PC-relative addressing mode to access the global dynamic GOT address.
5768 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
5769 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
5770 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
5772
5773 // Prepare argument list to generate call.
5774 ArgListTy Args;
5775 ArgListEntry Entry;
5776 Entry.Node = Load;
5777 Entry.Ty = CallTy;
5778 Args.push_back(Entry);
5779
5780 // Setup call to __tls_get_addr.
5782 CLI.setDebugLoc(DL)
5783 .setChain(DAG.getEntryNode())
5784 .setLibCallee(CallingConv::C, CallTy,
5785 DAG.getExternalSymbol("__tls_get_addr", Ty),
5786 std::move(Args));
5787
5788 return LowerCallTo(CLI).first;
5789}
5790
5791SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
5792 SelectionDAG &DAG) const {
5794 assert(N->getOffset() == 0 && "unexpected offset in global node");
5795
5796 if (DAG.getTarget().useEmulatedTLS())
5797 return LowerToTLSEmulatedModel(N, DAG);
5798
5799 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
5800
5803 report_fatal_error("In GHC calling convention TLS is not supported");
5804
5805 SDValue Addr;
5806 switch (Model) {
5808 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
5809 break;
5811 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
5812 break;
5815 Addr = getDynamicTLSAddr(N, DAG);
5816 break;
5817 }
5818
5819 return Addr;
5820}
5821
5822// Return true if Val is equal to (setcc LHS, RHS, CC).
5823// Return false if Val is the inverse of (setcc LHS, RHS, CC).
5824// Otherwise, return std::nullopt.
5825static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
5826 ISD::CondCode CC, SDValue Val) {
5827 assert(Val->getOpcode() == ISD::SETCC);
5828 SDValue LHS2 = Val.getOperand(0);
5829 SDValue RHS2 = Val.getOperand(1);
5830 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
5831
5832 if (LHS == LHS2 && RHS == RHS2) {
5833 if (CC == CC2)
5834 return true;
5835 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
5836 return false;
5837 } else if (LHS == RHS2 && RHS == LHS2) {
5839 if (CC == CC2)
5840 return true;
5841 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
5842 return false;
5843 }
5844
5845 return std::nullopt;
5846}
5847
5849 const RISCVSubtarget &Subtarget) {
5850 SDValue CondV = N->getOperand(0);
5851 SDValue TrueV = N->getOperand(1);
5852 SDValue FalseV = N->getOperand(2);
5853 MVT VT = N->getSimpleValueType(0);
5854 SDLoc DL(N);
5855
5856 if (!Subtarget.hasShortForwardBranchOpt()) {
5857 // (select c, -1, y) -> -c | y
5858 if (isAllOnesConstant(TrueV)) {
5859 SDValue Neg = DAG.getNegative(CondV, DL, VT);
5860 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
5861 }
5862 // (select c, y, -1) -> (c-1) | y
5863 if (isAllOnesConstant(FalseV)) {
5864 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
5865 DAG.getAllOnesConstant(DL, VT));
5866 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
5867 }
5868
5869 // (select c, 0, y) -> (c-1) & y
5870 if (isNullConstant(TrueV)) {
5871 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
5872 DAG.getAllOnesConstant(DL, VT));
5873 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
5874 }
5875 // (select c, y, 0) -> -c & y
5876 if (isNullConstant(FalseV)) {
5877 SDValue Neg = DAG.getNegative(CondV, DL, VT);
5878 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
5879 }
5880 }
5881
5882 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
5883 // when both truev and falsev are also setcc.
5884 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
5885 FalseV.getOpcode() == ISD::SETCC) {
5886 SDValue LHS = CondV.getOperand(0);
5887 SDValue RHS = CondV.getOperand(1);
5889
5890 // (select x, x, y) -> x | y
5891 // (select !x, x, y) -> x & y
5892 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
5893 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
5894 FalseV);
5895 }
5896 // (select x, y, x) -> x & y
5897 // (select !x, y, x) -> x | y
5898 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
5899 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
5900 FalseV);
5901 }
5902 }
5903
5904 return SDValue();
5905}
5906
5907/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
5908/// check for equality with 0. This function emits nodes that convert the
5909/// seteq/setne into something that can be compared with 0.
5910/// Based on RISCVDAGToDAGISel::selectSETCC but modified to produce
5911/// target-independent SelectionDAG nodes rather than machine nodes.
5913 SelectionDAG &DAG) {
5914 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
5915 "Unexpected condition code!");
5916
5917 // We're looking for a setcc.
5918 if (N->getOpcode() != ISD::SETCC)
5919 return SDValue();
5920
5921 // Must be an equality comparison.
5922 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
5923 if (CCVal != ExpectedCCVal)
5924 return SDValue();
5925
5926 SDValue LHS = N->getOperand(0);
5927 SDValue RHS = N->getOperand(1);
5928
5929 if (!LHS.getValueType().isScalarInteger())
5930 return SDValue();
5931
5932 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
5933 if (isNullConstant(RHS))
5934 return LHS;
5935
5936 SDLoc DL(N);
5937
5938 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
5939 int64_t CVal = C->getSExtValue();
5940 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
5941 // non-zero otherwise.
5942 if (CVal == -2048)
5943 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS,
5944 DAG.getConstant(CVal, DL, N->getValueType(0)));
5945 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
5946 // LHS is equal to the RHS and non-zero otherwise.
5947 if (isInt<12>(CVal) || CVal == 2048)
5948 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), LHS,
5949 DAG.getConstant(-CVal, DL, N->getValueType(0)));
5950 }
5951
5952 // If nothing else we can XOR the LHS and RHS to produce zero if they are
5953 // equal and a non-zero value if they aren't.
5954 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS, RHS);
5955}
5956
5957// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
5958// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
5959// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
5960// being `0` or `-1`. In such cases we can replace `select` with `and`.
5961// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
5962// than `c0`?
5963static SDValue
5965 const RISCVSubtarget &Subtarget) {
5966 if (Subtarget.hasShortForwardBranchOpt())
5967 return SDValue();
5968
5969 unsigned SelOpNo = 0;
5970 SDValue Sel = BO->getOperand(0);
5971 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
5972 SelOpNo = 1;
5973 Sel = BO->getOperand(1);
5974 }
5975
5976 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
5977 return SDValue();
5978
5979 unsigned ConstSelOpNo = 1;
5980 unsigned OtherSelOpNo = 2;
5981 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
5982 ConstSelOpNo = 2;
5983 OtherSelOpNo = 1;
5984 }
5985 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
5986 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
5987 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
5988 return SDValue();
5989
5990 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
5991 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
5992 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
5993 return SDValue();
5994
5995 SDLoc DL(Sel);
5996 EVT VT = BO->getValueType(0);
5997
5998 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
5999 if (SelOpNo == 1)
6000 std::swap(NewConstOps[0], NewConstOps[1]);
6001
6002 SDValue NewConstOp =
6003 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
6004 if (!NewConstOp)
6005 return SDValue();
6006
6007 const APInt &NewConstAPInt =
6008 cast<ConstantSDNode>(NewConstOp)->getAPIntValue();
6009 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
6010 return SDValue();
6011
6012 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
6013 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
6014 if (SelOpNo == 1)
6015 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
6016 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
6017
6018 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
6019 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
6020 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
6021}
6022
6023SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
6024 SDValue CondV = Op.getOperand(0);
6025 SDValue TrueV = Op.getOperand(1);
6026 SDValue FalseV = Op.getOperand(2);
6027 SDLoc DL(Op);
6028 MVT VT = Op.getSimpleValueType();
6029 MVT XLenVT = Subtarget.getXLenVT();
6030
6031 // Lower vector SELECTs to VSELECTs by splatting the condition.
6032 if (VT.isVector()) {
6033 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
6034 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
6035 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
6036 }
6037
6038 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
6039 // nodes to implement the SELECT. Performing the lowering here allows for
6040 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
6041 // sequence or RISCVISD::SELECT_CC node (branch-based select).
6042 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
6043 VT.isScalarInteger()) {
6044 if (SDValue NewCondV = selectSETCC(CondV, ISD::SETNE, DAG)) {
6045 // (select (riscv_setne c), t, 0) -> (czero_eqz t, c)
6046 if (isNullConstant(FalseV))
6047 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV);
6048 // (select (riscv_setne c), 0, f) -> (czero_nez f, c)
6049 if (isNullConstant(TrueV))
6050 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV);
6051 // (select (riscv_setne c), t, f) -> (or (czero_eqz t, c), (czero_nez f,
6052 // c)
6053 return DAG.getNode(
6054 ISD::OR, DL, VT,
6055 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV),
6056 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV));
6057 }
6058 if (SDValue NewCondV = selectSETCC(CondV, ISD::SETEQ, DAG)) {
6059 // (select (riscv_seteq c), t, 0) -> (czero_nez t, c)
6060 if (isNullConstant(FalseV))
6061 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV);
6062 // (select (riscv_seteq c), 0, f) -> (czero_eqz f, c)
6063 if (isNullConstant(TrueV))
6064 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV);
6065 // (select (riscv_seteq c), t, f) -> (or (czero_eqz f, c), (czero_nez t,
6066 // c)
6067 return DAG.getNode(
6068 ISD::OR, DL, VT,
6069 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV),
6070 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV));
6071 }
6072
6073 // (select c, t, 0) -> (czero_eqz t, c)
6074 if (isNullConstant(FalseV))
6075 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
6076 // (select c, 0, f) -> (czero_nez f, c)
6077 if (isNullConstant(TrueV))
6078 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
6079
6080 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
6081 if (TrueV.getOpcode() == ISD::AND &&
6082 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
6083 return DAG.getNode(
6084 ISD::OR, DL, VT, TrueV,
6085 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
6086 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
6087 if (FalseV.getOpcode() == ISD::AND &&
6088 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
6089 return DAG.getNode(
6090 ISD::OR, DL, VT, FalseV,
6091 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
6092
6093 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
6094 return DAG.getNode(ISD::OR, DL, VT,
6095 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
6096 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
6097 }
6098
6099 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
6100 return V;
6101
6102 if (Op.hasOneUse()) {
6103 unsigned UseOpc = Op->use_begin()->getOpcode();
6104 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
6105 SDNode *BinOp = *Op->use_begin();
6106 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
6107 DAG, Subtarget)) {
6108 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
6109 return lowerSELECT(NewSel, DAG);
6110 }
6111 }
6112 }
6113
6114 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
6115 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
6116 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
6117 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
6118 if (FPTV && FPFV) {
6119 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
6120 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
6121 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
6122 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
6123 DAG.getConstant(1, DL, XLenVT));
6124 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
6125 }
6126 }
6127
6128 // If the condition is not an integer SETCC which operates on XLenVT, we need
6129 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
6130 // (select condv, truev, falsev)
6131 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
6132 if (CondV.getOpcode() != ISD::SETCC ||
6133 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
6134 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
6135 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
6136
6137 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
6138
6139 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
6140 }
6141
6142 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
6143 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
6144 // advantage of the integer compare+branch instructions. i.e.:
6145 // (select (setcc lhs, rhs, cc), truev, falsev)
6146 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
6147 SDValue LHS = CondV.getOperand(0);
6148 SDValue RHS = CondV.getOperand(1);
6149 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
6150
6151 // Special case for a select of 2 constants that have a diffence of 1.
6152 // Normally this is done by DAGCombine, but if the select is introduced by
6153 // type legalization or op legalization, we miss it. Restricting to SETLT
6154 // case for now because that is what signed saturating add/sub need.
6155 // FIXME: We don't need the condition to be SETLT or even a SETCC,
6156 // but we would probably want to swap the true/false values if the condition
6157 // is SETGE/SETLE to avoid an XORI.
6158 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6159 CCVal == ISD::SETLT) {
6160 const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
6161 const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
6162 if (TrueVal - 1 == FalseVal)
6163 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
6164 if (TrueVal + 1 == FalseVal)
6165 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
6166 }
6167
6168 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6169 // 1 < x ? x : 1 -> 0 < x ? x : 1
6170 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
6171 RHS == TrueV && LHS == FalseV) {
6172 LHS = DAG.getConstant(0, DL, VT);
6173 // 0 <u x is the same as x != 0.
6174 if (CCVal == ISD::SETULT) {
6175 std::swap(LHS, RHS);
6176 CCVal = ISD::SETNE;
6177 }
6178 }
6179
6180 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
6181 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
6182 RHS == FalseV) {
6183 RHS = DAG.getConstant(0, DL, VT);
6184 }
6185
6186 SDValue TargetCC = DAG.getCondCode(CCVal);
6187
6188 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
6189 // (select (setcc lhs, rhs, CC), constant, falsev)
6190 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
6191 std::swap(TrueV, FalseV);
6192 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
6193 }
6194
6195 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
6196 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
6197}
6198
6199SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
6200 SDValue CondV = Op.getOperand(1);
6201 SDLoc DL(Op);
6202 MVT XLenVT = Subtarget.getXLenVT();
6203
6204 if (CondV.getOpcode() == ISD::SETCC &&
6205 CondV.getOperand(0).getValueType() == XLenVT) {
6206 SDValue LHS = CondV.getOperand(0);
6207 SDValue RHS = CondV.getOperand(1);
6208 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
6209
6210 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6211
6212 SDValue TargetCC = DAG.getCondCode(CCVal);
6213 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
6214 LHS, RHS, TargetCC, Op.getOperand(2));
6215 }
6216
6217 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
6218 CondV, DAG.getConstant(0, DL, XLenVT),
6219 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
6220}
6221
6222SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
6225
6226 SDLoc DL(Op);
6227 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
6229
6230 // vastart just stores the address of the VarArgsFrameIndex slot into the
6231 // memory location argument.
6232 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
6233 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
6234 MachinePointerInfo(SV));
6235}
6236
6237SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
6238 SelectionDAG &DAG) const {
6239 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
6241 MachineFrameInfo &MFI = MF.getFrameInfo();
6242 MFI.setFrameAddressIsTaken(true);
6243 Register FrameReg = RI.getFrameRegister(MF);
6244 int XLenInBytes = Subtarget.getXLen() / 8;
6245
6246 EVT VT = Op.getValueType();
6247 SDLoc DL(Op);
6248 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
6249 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6250 while (Depth--) {
6251 int Offset = -(XLenInBytes * 2);
6252 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
6254 FrameAddr =
6255 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
6256 }
6257 return FrameAddr;
6258}
6259
6260SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
6261 SelectionDAG &DAG) const {
6262 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
6264 MachineFrameInfo &MFI = MF.getFrameInfo();
6265 MFI.setReturnAddressIsTaken(true);
6266 MVT XLenVT = Subtarget.getXLenVT();
6267 int XLenInBytes = Subtarget.getXLen() / 8;
6268
6270 return SDValue();
6271
6272 EVT VT = Op.getValueType();
6273 SDLoc DL(Op);
6274 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6275 if (Depth) {
6276 int Off = -XLenInBytes;
6277 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
6278 SDValue Offset = DAG.getConstant(Off, DL, VT);
6279 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
6280 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
6282 }
6283
6284 // Return the value of the return address register, marking it an implicit
6285 // live-in.
6286 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
6287 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
6288}
6289
6290SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
6291 SelectionDAG &DAG) const {
6292 SDLoc DL(Op);
6293 SDValue Lo = Op.getOperand(0);
6294 SDValue Hi = Op.getOperand(1);
6295 SDValue Shamt = Op.getOperand(2);
6296 EVT VT = Lo.getValueType();
6297
6298 // if Shamt-XLEN < 0: // Shamt < XLEN
6299 // Lo = Lo << Shamt
6300 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
6301 // else:
6302 // Lo = 0
6303 // Hi = Lo << (Shamt-XLEN)
6304
6305 SDValue Zero = DAG.getConstant(0, DL, VT);
6306 SDValue One = DAG.getConstant(1, DL, VT);
6307 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
6308 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
6309 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
6310 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
6311
6312 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
6313 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
6314 SDValue ShiftRightLo =
6315 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
6316 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
6317 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
6318 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
6319
6320 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
6321
6322 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
6323 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
6324
6325 SDValue Parts[2] = {Lo, Hi};
6326 return DAG.getMergeValues(Parts, DL);
6327}
6328
6329SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
6330 bool IsSRA) const {
6331 SDLoc DL(Op);
6332 SDValue Lo = Op.getOperand(0);
6333 SDValue Hi = Op.getOperand(1);
6334 SDValue Shamt = Op.getOperand(2);
6335 EVT VT = Lo.getValueType();
6336
6337 // SRA expansion:
6338 // if Shamt-XLEN < 0: // Shamt < XLEN
6339 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
6340 // Hi = Hi >>s Shamt
6341 // else:
6342 // Lo = Hi >>s (Shamt-XLEN);
6343 // Hi = Hi >>s (XLEN-1)
6344 //
6345 // SRL expansion:
6346 // if Shamt-XLEN < 0: // Shamt < XLEN
6347 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
6348 // Hi = Hi >>u Shamt
6349 // else:
6350 // Lo = Hi >>u (Shamt-XLEN);
6351 // Hi = 0;
6352
6353 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
6354
6355 SDValue Zero = DAG.getConstant(0, DL, VT);
6356 SDValue One = DAG.getConstant(1, DL, VT);
6357 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
6358 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
6359 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
6360 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
6361
6362 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
6363 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
6364 SDValue ShiftLeftHi =
6365 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
6366 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
6367 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
6368 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
6369 SDValue HiFalse =
6370 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
6371
6372 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
6373
6374 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
6375 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
6376
6377 SDValue Parts[2] = {Lo, Hi};
6378 return DAG.getMergeValues(Parts, DL);
6379}
6380
6381// Lower splats of i1 types to SETCC. For each mask vector type, we have a
6382// legal equivalently-sized i8 type, so we can use that as a go-between.
6383SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
6384 SelectionDAG &DAG) const {
6385 SDLoc DL(Op);
6386 MVT VT = Op.getSimpleValueType();
6387 SDValue SplatVal = Op.getOperand(0);
6388 // All-zeros or all-ones splats are handled specially.
6389 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
6390 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
6391 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
6392 }
6393 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
6394 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
6395 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
6396 }
6397 MVT XLenVT = Subtarget.getXLenVT();
6398 assert(SplatVal.getValueType() == XLenVT &&
6399 "Unexpected type for i1 splat value");
6400 MVT InterVT = VT.changeVectorElementType(MVT::i8);
6401 SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
6402 DAG.getConstant(1, DL, XLenVT));
6403 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
6404 SDValue Zero = DAG.getConstant(0, DL, InterVT);
6405 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
6406}
6407
6408// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
6409// illegal (currently only vXi64 RV32).
6410// FIXME: We could also catch non-constant sign-extended i32 values and lower
6411// them to VMV_V_X_VL.
6412SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
6413 SelectionDAG &DAG) const {
6414 SDLoc DL(Op);
6415 MVT VecVT = Op.getSimpleValueType();
6416 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
6417 "Unexpected SPLAT_VECTOR_PARTS lowering");
6418
6419 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
6420 SDValue Lo = Op.getOperand(0);
6421 SDValue Hi = Op.getOperand(1);
6422
6423 if (VecVT.isFixedLengthVector()) {
6424 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
6425 SDLoc DL(Op);
6426 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
6427
6428 SDValue Res =
6429 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
6430 return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
6431 }
6432
6434 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
6435 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
6436 // If Hi constant is all the same sign bit as Lo, lower this as a custom
6437 // node in order to try and match RVV vector/scalar instructions.
6438 if ((LoC >> 31) == HiC)
6439 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
6440 Lo, DAG.getRegister(RISCV::X0, MVT::i32));
6441 }
6442
6443 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
6444 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
6445 isa<ConstantSDNode>(Hi.getOperand(1)) &&
6446 Hi.getConstantOperandVal(1) == 31)
6447 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo,
6448 DAG.getRegister(RISCV::X0, MVT::i32));
6449
6450 // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
6452 DAG.getUNDEF(VecVT), Lo, Hi,
6453 DAG.getRegister(RISCV::X0, MVT::i32));
6454}
6455
6456// Custom-lower extensions from mask vectors by using a vselect either with 1
6457// for zero/any-extension or -1 for sign-extension:
6458// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
6459// Note that any-extension is lowered identically to zero-extension.
6460SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
6461 int64_t ExtTrueVal) const {
6462 SDLoc DL(Op);
6463 MVT VecVT = Op.getSimpleValueType();
6464 SDValue Src = Op.getOperand(0);
6465 // Only custom-lower extensions from mask types
6466 assert(Src.getValueType().isVector() &&
6467 Src.getValueType().getVectorElementType() == MVT::i1);
6468
6469 if (VecVT.isScalableVector()) {
6470 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
6471 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
6472 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
6473 }
6474
6475 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
6476 MVT I1ContainerVT =
6477 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
6478
6479 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
6480
6481 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
6482
6483 MVT XLenVT = Subtarget.getXLenVT();
6484 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6485 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
6486
6487 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6488 DAG.getUNDEF(ContainerVT), SplatZero, VL);
6489 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6490 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
6491 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
6492 SplatTrueVal, SplatZero, VL);
6493
6494 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
6495}
6496
6497SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
6498 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
6499 MVT ExtVT = Op.getSimpleValueType();
6500 // Only custom-lower extensions from fixed-length vector types.
6501 if (!ExtVT.isFixedLengthVector())
6502 return Op;
6503 MVT VT = Op.getOperand(0).getSimpleValueType();
6504 // Grab the canonical container type for the extended type. Infer the smaller
6505 // type from that to ensure the same number of vector elements, as we know
6506 // the LMUL will be sufficient to hold the smaller type.
6507 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
6508 // Get the extended container type manually to ensure the same number of
6509 // vector elements between source and dest.
6510 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
6511 ContainerExtVT.getVectorElementCount());
6512
6513 SDValue Op1 =
6514 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
6515
6516 SDLoc DL(Op);
6517 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6518
6519 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
6520
6521 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
6522}
6523
6524// Custom-lower truncations from vectors to mask vectors by using a mask and a
6525// setcc operation:
6526// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
6527SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
6528 SelectionDAG &DAG) const {
6529 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
6530 SDLoc DL(Op);
6531 EVT MaskVT = Op.getValueType();
6532 // Only expect to custom-lower truncations to mask types
6533 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
6534 "Unexpected type for vector mask lowering");
6535 SDValue Src = Op.getOperand(0);
6536 MVT VecVT = Src.getSimpleValueType();
6537 SDValue Mask, VL;
6538 if (IsVPTrunc) {
6539 Mask = Op.getOperand(1);
6540 VL = Op.getOperand(2);
6541 }
6542 // If this is a fixed vector, we need to convert it to a scalable vector.
6543 MVT ContainerVT = VecVT;
6544
6545 if (VecVT.isFixedLengthVector()) {
6546 ContainerVT = getContainerForFixedLengthVector(VecVT);
6547 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6548 if (IsVPTrunc) {
6549 MVT MaskContainerVT =
6550 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6551 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6552 }
6553 }
6554
6555 if (!IsVPTrunc) {
6556 std::tie(Mask, VL) =
6557 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
6558 }
6559
6560 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
6561 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
6562
6563 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6564 DAG.getUNDEF(ContainerVT), SplatOne, VL);
6565 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6566 DAG.getUNDEF(ContainerVT), SplatZero, VL);
6567
6568 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
6569 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
6570 DAG.getUNDEF(ContainerVT), Mask, VL);
6571 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
6572 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
6573 DAG.getUNDEF(MaskContainerVT), Mask, VL});
6574 if (MaskVT.isFixedLengthVector())
6575 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
6576 return Trunc;
6577}
6578
6579SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
6580 SelectionDAG &DAG) const {
6581 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
6582 SDLoc DL(Op);
6583
6584 MVT VT = Op.getSimpleValueType();
6585 // Only custom-lower vector truncates
6586 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
6587
6588 // Truncates to mask types are handled differently
6589 if (VT.getVectorElementType() == MVT::i1)
6590 return lowerVectorMaskTruncLike(Op, DAG);
6591
6592 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
6593 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
6594 // truncate by one power of two at a time.
6595 MVT DstEltVT = VT.getVectorElementType();
6596
6597 SDValue Src = Op.getOperand(0);
6598 MVT SrcVT = Src.getSimpleValueType();
6599 MVT SrcEltVT = SrcVT.getVectorElementType();
6600
6601 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
6602 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
6603 "Unexpected vector truncate lowering");
6604
6605 MVT ContainerVT = SrcVT;
6606 SDValue Mask, VL;
6607 if (IsVPTrunc) {
6608 Mask = Op.getOperand(1);
6609 VL = Op.getOperand(2);
6610 }
6611 if (SrcVT.isFixedLengthVector()) {
6612 ContainerVT = getContainerForFixedLengthVector(SrcVT);
6613 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6614 if (IsVPTrunc) {
6615 MVT MaskVT = getMaskTypeFor(ContainerVT);
6616 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6617 }
6618 }
6619
6620 SDValue Result = Src;
6621 if (!IsVPTrunc) {
6622 std::tie(Mask, VL) =
6623 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
6624 }
6625
6626 LLVMContext &Context = *DAG.getContext();
6627 const ElementCount Count = ContainerVT.getVectorElementCount();
6628 do {
6629 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
6630 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
6631 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
6632 Mask, VL);
6633 } while (SrcEltVT != DstEltVT);
6634
6635 if (SrcVT.isFixedLengthVector())
6636 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
6637
6638 return Result;
6639}
6640
6641SDValue
6642RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
6643 SelectionDAG &DAG) const {
6644 SDLoc DL(Op);
6645 SDValue Chain = Op.getOperand(0);
6646 SDValue Src = Op.getOperand(1);
6647 MVT VT = Op.getSimpleValueType();
6648 MVT SrcVT = Src.getSimpleValueType();
6649 MVT ContainerVT = VT;
6650 if (VT.isFixedLengthVector()) {
6651 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6652 ContainerVT =
6653 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
6654 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6655 }
6656
6657 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
6658
6659 // RVV can only widen/truncate fp to types double/half the size as the source.
6660 if ((VT.getVectorElementType() == MVT::f64 &&
6661 SrcVT.getVectorElementType() == MVT::f16) ||
6662 (VT.getVectorElementType() == MVT::f16 &&
6663 SrcVT.getVectorElementType() == MVT::f64)) {
6664 // For double rounding, the intermediate rounding should be round-to-odd.
6665 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
6668 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
6669 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
6670 Chain, Src, Mask, VL);
6671 Chain = Src.getValue(1);
6672 }
6673
6674 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
6677 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6678 Chain, Src, Mask, VL);
6679 if (VT.isFixedLengthVector()) {
6680 // StrictFP operations have two result values. Their lowered result should
6681 // have same result count.
6682 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
6683 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
6684 }
6685 return Res;
6686}
6687
6688SDValue
6689RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
6690 SelectionDAG &DAG) const {
6691 bool IsVP =
6692 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
6693 bool IsExtend =
6694 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
6695 // RVV can only do truncate fp to types half the size as the source. We
6696 // custom-lower f64->f16 rounds via RVV's round-to-odd float
6697 // conversion instruction.
6698 SDLoc DL(Op);
6699 MVT VT = Op.getSimpleValueType();
6700
6701 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
6702
6703 SDValue Src = Op.getOperand(0);
6704 MVT SrcVT = Src.getSimpleValueType();
6705
6706 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
6707 SrcVT.getVectorElementType() != MVT::f16);
6708 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
6709 SrcVT.getVectorElementType() != MVT::f64);
6710
6711 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
6712
6713 // Prepare any fixed-length vector operands.
6714 MVT ContainerVT = VT;
6715 SDValue Mask, VL;
6716 if (IsVP) {
6717 Mask = Op.getOperand(1);
6718 VL = Op.getOperand(2);
6719 }
6720 if (VT.isFixedLengthVector()) {
6721 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6722 ContainerVT =
6723 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
6724 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6725 if (IsVP) {
6726 MVT MaskVT = getMaskTypeFor(ContainerVT);
6727 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6728 }
6729 }
6730
6731 if (!IsVP)
6732 std::tie(Mask, VL) =
6733 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
6734
6735 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
6736
6737 if (IsDirectConv) {
6738 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
6739 if (VT.isFixedLengthVector())
6740 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
6741 return Src;
6742 }
6743
6744 unsigned InterConvOpc =
6746
6747 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
6748 SDValue IntermediateConv =
6749 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
6750 SDValue Result =
6751 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
6752 if (VT.isFixedLengthVector())
6753 return convertFromScalableVector(VT, Result, DAG, Subtarget);
6754 return Result;
6755}
6756
6757// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
6758// first position of a vector, and that vector is slid up to the insert index.
6759// By limiting the active vector length to index+1 and merging with the
6760// original vector (with an undisturbed tail policy for elements >= VL), we
6761// achieve the desired result of leaving all elements untouched except the one
6762// at VL-1, which is replaced with the desired value.
6763SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6764 SelectionDAG &DAG) const {
6765 SDLoc DL(Op);
6766 MVT VecVT = Op.getSimpleValueType();
6767 SDValue Vec = Op.getOperand(0);
6768 SDValue Val = Op.getOperand(1);
6769 SDValue Idx = Op.getOperand(2);
6770
6771 if (VecVT.getVectorElementType() == MVT::i1) {
6772 // FIXME: For now we just promote to an i8 vector and insert into that,
6773 // but this is probably not optimal.
6774 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
6775 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
6776 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
6777 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
6778 }
6779
6780 MVT ContainerVT = VecVT;
6781 // If the operand is a fixed-length vector, convert to a scalable one.
6782 if (VecVT.isFixedLengthVector()) {
6783 ContainerVT = getContainerForFixedLengthVector(VecVT);
6784 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
6785 }
6786
6787 MVT XLenVT = Subtarget.getXLenVT();
6788
6789 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
6790 // Even i64-element vectors on RV32 can be lowered without scalar
6791 // legalization if the most-significant 32 bits of the value are not affected
6792 // by the sign-extension of the lower 32 bits.
6793 // TODO: We could also catch sign extensions of a 32-bit value.
6794 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
6795 const auto *CVal = cast<ConstantSDNode>(Val);
6796 if (isInt<32>(CVal->getSExtValue())) {
6797 IsLegalInsert = true;
6798 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
6799 }
6800 }
6801
6802 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
6803
6804 SDValue ValInVec;
6805
6806 if (IsLegalInsert) {
6807 unsigned Opc =
6809 if (isNullConstant(Idx)) {
6810 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
6811 if (!VecVT.isFixedLengthVector())
6812 return Vec;
6813 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
6814 }
6815 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
6816 } else {
6817 // On RV32, i64-element vectors must be specially handled to place the
6818 // value at element 0, by using two vslide1down instructions in sequence on
6819 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
6820 // this.
6821 SDValue ValLo, ValHi;
6822 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
6823 MVT I32ContainerVT =
6824 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
6825 SDValue I32Mask =
6826 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
6827 // Limit the active VL to two.
6828 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
6829 // If the Idx is 0 we can insert directly into the vector.
6830 if (isNullConstant(Idx)) {
6831 // First slide in the lo value, then the hi in above it. We use slide1down
6832 // to avoid the register group overlap constraint of vslide1up.
6833 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
6834 Vec, Vec, ValLo, I32Mask, InsertI64VL);
6835 // If the source vector is undef don't pass along the tail elements from
6836 // the previous slide1down.
6837 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
6838 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
6839 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
6840 // Bitcast back to the right container type.
6841 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
6842
6843 if (!VecVT.isFixedLengthVector())
6844 return ValInVec;
6845 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
6846 }
6847
6848 // First slide in the lo value, then the hi in above it. We use slide1down
6849 // to avoid the register group overlap constraint of vslide1up.
6850 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
6851 DAG.getUNDEF(I32ContainerVT),
6852 DAG.getUNDEF(I32ContainerVT), ValLo,
6853 I32Mask, InsertI64VL);
6854 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
6855 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
6856 I32Mask, InsertI64VL);
6857 // Bitcast back to the right container type.
6858 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
6859 }
6860
6861 // Now that the value is in a vector, slide it into position.
6862 SDValue InsertVL =
6863 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
6864
6865 // Use tail agnostic policy if Idx is the last index of Vec.
6867 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
6868 cast<ConstantSDNode>(Idx)->getZExtValue() + 1 ==
6869 VecVT.getVectorNumElements())
6870 Policy = RISCVII::TAIL_AGNOSTIC;
6871 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
6872 Idx, Mask, InsertVL, Policy);
6873 if (!VecVT.isFixedLengthVector())
6874 return Slideup;
6875 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
6876}
6877
6878// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
6879// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
6880// types this is done using VMV_X_S to allow us to glean information about the
6881// sign bits of the result.
6882SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6883 SelectionDAG &DAG) const {
6884 SDLoc DL(Op);
6885 SDValue Idx = Op.getOperand(1);
6886 SDValue Vec = Op.getOperand(0);
6887 EVT EltVT = Op.getValueType();
6888 MVT VecVT = Vec.getSimpleValueType();
6889 MVT XLenVT = Subtarget.getXLenVT();
6890
6891 if (VecVT.getVectorElementType() == MVT::i1) {
6892 // Use vfirst.m to extract the first bit.
6893 if (isNullConstant(Idx)) {
6894 MVT ContainerVT = VecVT;
6895 if (VecVT.isFixedLengthVector()) {
6896 ContainerVT = getContainerForFixedLengthVector(VecVT);
6897 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
6898 }
6899 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
6900 SDValue Vfirst =
6901 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
6902 return DAG.getSetCC(DL, XLenVT, Vfirst, DAG.getConstant(0, DL, XLenVT),
6903 ISD::SETEQ);
6904 }
6905 if (VecVT.isFixedLengthVector()) {
6906 unsigned NumElts = VecVT.getVectorNumElements();
6907 if (NumElts >= 8) {
6908 MVT WideEltVT;
6909 unsigned WidenVecLen;
6910 SDValue ExtractElementIdx;
6911 SDValue ExtractBitIdx;
6912 unsigned MaxEEW = Subtarget.getELEN();
6913 MVT LargestEltVT = MVT::getIntegerVT(
6914 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
6915 if (NumElts <= LargestEltVT.getSizeInBits()) {
6916 assert(isPowerOf2_32(NumElts) &&
6917 "the number of elements should be power of 2");
6918 WideEltVT = MVT::getIntegerVT(NumElts);
6919 WidenVecLen = 1;
6920 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
6921 ExtractBitIdx = Idx;
6922 } else {
6923 WideEltVT = LargestEltVT;
6924 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
6925 // extract element index = index / element width
6926 ExtractElementIdx = DAG.getNode(
6927 ISD::SRL, DL, XLenVT, Idx,
6928 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
6929 // mask bit index = index % element width
6930 ExtractBitIdx = DAG.getNode(
6931 ISD::AND, DL, XLenVT, Idx,
6932 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
6933 }
6934 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
6935 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
6936 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
6937 Vec, ExtractElementIdx);
6938 // Extract the bit from GPR.
6939 SDValue ShiftRight =
6940 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
6941 return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
6942 DAG.getConstant(1, DL, XLenVT));
6943 }
6944 }
6945 // Otherwise, promote to an i8 vector and extract from that.
6946 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
6947 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
6948 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
6949 }
6950
6951 // If this is a fixed vector, we need to convert it to a scalable vector.
6952 MVT ContainerVT = VecVT;
6953 if (VecVT.isFixedLengthVector()) {
6954 ContainerVT = getContainerForFixedLengthVector(VecVT);
6955 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
6956 }
6957
6958 // If the index is 0, the vector is already in the right position.
6959 if (!isNullConstant(Idx)) {
6960 // Use a VL of 1 to avoid processing more elements than we need.
6961 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
6962 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
6963 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
6964 }
6965
6966 if (!EltVT.isInteger()) {
6967 // Floating-point extracts are handled in TableGen.
6968 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6969 DAG.getConstant(0, DL, XLenVT));
6970 }
6971
6972 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
6973 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
6974}
6975
6976// Some RVV intrinsics may claim that they want an integer operand to be
6977// promoted or expanded.
6979 const RISCVSubtarget &Subtarget) {
6980 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
6981 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
6982 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
6983 "Unexpected opcode");
6984
6985 if (!Subtarget.hasVInstructions())
6986 return SDValue();
6987
6988 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
6989 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
6990 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
6991
6992 SDLoc DL(Op);
6993
6995 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
6996 if (!II || !II->hasScalarOperand())
6997 return SDValue();
6998
6999 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
7000 assert(SplatOp < Op.getNumOperands());
7001
7002 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
7003 SDValue &ScalarOp = Operands[SplatOp];
7004 MVT OpVT = ScalarOp.getSimpleValueType();
7005 MVT XLenVT = Subtarget.getXLenVT();
7006
7007 // If this isn't a scalar, or its type is XLenVT we're done.
7008 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
7009 return SDValue();
7010
7011 // Simplest case is that the operand needs to be promoted to XLenVT.
7012 if (OpVT.bitsLT(XLenVT)) {
7013 // If the operand is a constant, sign extend to increase our chances
7014 // of being able to use a .vi instruction. ANY_EXTEND would become a
7015 // a zero extend and the simm5 check in isel would fail.
7016 // FIXME: Should we ignore the upper bits in isel instead?
7017 unsigned ExtOpc =
7019 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
7020 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7021 }
7022
7023 // Use the previous operand to get the vXi64 VT. The result might be a mask
7024 // VT for compares. Using the previous operand assumes that the previous
7025 // operand will never have a smaller element size than a scalar operand and
7026 // that a widening operation never uses SEW=64.
7027 // NOTE: If this fails the below assert, we can probably just find the
7028 // element count from any operand or result and use it to construct the VT.
7029 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
7030 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
7031
7032 // The more complex case is when the scalar is larger than XLenVT.
7033 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
7034 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
7035
7036 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
7037 // instruction to sign-extend since SEW>XLEN.
7038 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
7039 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
7040 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7041 }
7042
7043 switch (IntNo) {
7044 case Intrinsic::riscv_vslide1up:
7045 case Intrinsic::riscv_vslide1down:
7046 case Intrinsic::riscv_vslide1up_mask:
7047 case Intrinsic::riscv_vslide1down_mask: {
7048 // We need to special case these when the scalar is larger than XLen.
7049 unsigned NumOps = Op.getNumOperands();
7050 bool IsMasked = NumOps == 7;
7051
7052 // Convert the vector source to the equivalent nxvXi32 vector.
7053 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
7054 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
7055 SDValue ScalarLo, ScalarHi;
7056 std::tie(ScalarLo, ScalarHi) =
7057 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
7058
7059 // Double the VL since we halved SEW.
7060 SDValue AVL = getVLOperand(Op);
7061 SDValue I32VL;
7062
7063 // Optimize for constant AVL
7064 if (isa<ConstantSDNode>(AVL)) {
7065 unsigned EltSize = VT.getScalarSizeInBits();
7066 unsigned MinSize = VT.getSizeInBits().getKnownMinValue();
7067
7068 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
7069 unsigned MaxVLMAX =
7070 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
7071
7072 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
7073 unsigned MinVLMAX =
7074 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
7075
7076 uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
7077 if (AVLInt <= MinVLMAX) {
7078 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
7079 } else if (AVLInt >= 2 * MaxVLMAX) {
7080 // Just set vl to VLMAX in this situation
7082 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
7083 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
7084 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
7085 SDValue SETVLMAX = DAG.getTargetConstant(
7086 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
7087 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
7088 LMUL);
7089 } else {
7090 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
7091 // is related to the hardware implementation.
7092 // So let the following code handle
7093 }
7094 }
7095 if (!I32VL) {
7097 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
7098 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
7099 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
7100 SDValue SETVL =
7101 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
7102 // Using vsetvli instruction to get actually used length which related to
7103 // the hardware implementation
7104 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
7105 SEW, LMUL);
7106 I32VL =
7107 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
7108 }
7109
7110 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
7111
7112 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
7113 // instructions.
7114 SDValue Passthru;
7115 if (IsMasked)
7116 Passthru = DAG.getUNDEF(I32VT);
7117 else
7118 Passthru = DAG.getBitcast(I32VT, Operands[1]);
7119
7120 if (IntNo == Intrinsic::riscv_vslide1up ||
7121 IntNo == Intrinsic::riscv_vslide1up_mask) {
7122 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
7123 ScalarHi, I32Mask, I32VL);
7124 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
7125 ScalarLo, I32Mask, I32VL);
7126 } else {
7127 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
7128 ScalarLo, I32Mask, I32VL);
7129 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
7130 ScalarHi, I32Mask, I32VL);
7131 }
7132
7133 // Convert back to nxvXi64.
7134 Vec = DAG.getBitcast(VT, Vec);
7135
7136 if (!IsMasked)
7137 return Vec;
7138 // Apply mask after the operation.
7139 SDValue Mask = Operands[NumOps - 3];
7140 SDValue MaskedOff = Operands[1];
7141 // Assume Policy operand is the last operand.
7142 uint64_t Policy =
7143 cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue();
7144 // We don't need to select maskedoff if it's undef.
7145 if (MaskedOff.isUndef())
7146 return Vec;
7147 // TAMU
7148 if (Policy == RISCVII::TAIL_AGNOSTIC)
7149 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff,
7150 AVL);
7151 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
7152 // It's fine because vmerge does not care mask policy.
7153 return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff,
7154 AVL);
7155 }
7156 }
7157
7158 // We need to convert the scalar to a splat vector.
7159 SDValue VL = getVLOperand(Op);
7160 assert(VL.getValueType() == XLenVT);
7161 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
7162 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7163}
7164
7165// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
7166// scalable vector llvm.get.vector.length for now.
7167//
7168// We need to convert from a scalable VF to a vsetvli with VLMax equal to
7169// (vscale * VF). The vscale and VF are independent of element width. We use
7170// SEW=8 for the vsetvli because it is the only element width that supports all
7171// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
7172// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
7173// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
7174// SEW and LMUL are better for the surrounding vector instructions.
7176 const RISCVSubtarget &Subtarget) {
7177 MVT XLenVT = Subtarget.getXLenVT();
7178
7179 // The smallest LMUL is only valid for the smallest element width.
7180 const unsigned ElementWidth = 8;
7181
7182 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
7183 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
7184 // We don't support VF==1 with ELEN==32.
7185 unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
7186
7187 unsigned VF = N->getConstantOperandVal(2);
7188 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
7189 "Unexpected VF");
7190 (void)MinVF;
7191
7192 bool Fractional = VF < LMul1VF;
7193 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
7194 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
7195 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
7196
7197 SDLoc DL(N);
7198
7199 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
7200 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
7201
7202 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
7203
7204 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
7205 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
7206}
7207
7208SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
7209 SelectionDAG &DAG) const {
7210 unsigned IntNo = Op.getConstantOperandVal(0);
7211 SDLoc DL(Op);
7212 MVT XLenVT = Subtarget.getXLenVT();
7213
7214 switch (IntNo) {
7215 default:
7216 break; // Don't custom lower most intrinsics.
7217 case Intrinsic::thread_pointer: {
7218 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7219 return DAG.getRegister(RISCV::X4, PtrVT);
7220 }
7221 case Intrinsic::riscv_orc_b:
7222 case Intrinsic::riscv_brev8:
7223 case Intrinsic::riscv_sha256sig0:
7224 case Intrinsic::riscv_sha256sig1:
7225 case Intrinsic::riscv_sha256sum0:
7226 case Intrinsic::riscv_sha256sum1:
7227 case Intrinsic::riscv_sm3p0:
7228 case Intrinsic::riscv_sm3p1: {
7229 unsigned Opc;
7230 switch (IntNo) {
7231 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
7232 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
7233 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
7234 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
7235 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
7236 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
7237 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
7238 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
7239 }
7240
7241 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
7242 }
7243 case Intrinsic::riscv_sm4ks:
7244 case Intrinsic::riscv_sm4ed: {
7245 unsigned Opc =
7246 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
7247 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
7248 Op.getOperand(3));
7249 }
7250 case Intrinsic::riscv_zip:
7251 case Intrinsic::riscv_unzip: {
7252 unsigned Opc =
7253 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
7254 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
7255 }
7256 case Intrinsic::riscv_clmul:
7257 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
7258 Op.getOperand(2));
7259 case Intrinsic::riscv_clmulh:
7260 return DAG.getNode(RISCVISD::CLMULH, DL, XLenVT, Op.getOperand(1),
7261 Op.getOperand(2));
7262 case Intrinsic::riscv_clmulr:
7263 return DAG.getNode(RISCVISD::CLMULR, DL, XLenVT, Op.getOperand(1),
7264 Op.getOperand(2));
7265 case Intrinsic::experimental_get_vector_length:
7266 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
7267 case Intrinsic::riscv_vmv_x_s:
7268 assert(Op.getValueType() == XLenVT && "Unexpected VT!");
7269 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
7270 Op.getOperand(1));
7271 case Intrinsic::riscv_vfmv_f_s:
7272 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
7273 Op.getOperand(1), DAG.getConstant(0, DL, XLenVT));
7274 case Intrinsic::riscv_vmv_v_x:
7275 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
7276 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
7277 Subtarget);
7278 case Intrinsic::riscv_vfmv_v_f:
7279 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
7280 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
7281 case Intrinsic::riscv_vmv_s_x: {
7282 SDValue Scalar = Op.getOperand(2);
7283
7284 if (Scalar.getValueType().bitsLE(XLenVT)) {
7285 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
7286 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
7287 Op.getOperand(1), Scalar, Op.getOperand(3));
7288 }
7289
7290 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
7291
7292 // This is an i64 value that lives in two scalar registers. We have to
7293 // insert this in a convoluted way. First we build vXi64 splat containing
7294 // the two values that we assemble using some bit math. Next we'll use
7295 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
7296 // to merge element 0 from our splat into the source vector.
7297 // FIXME: This is probably not the best way to do this, but it is
7298 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
7299 // point.
7300 // sw lo, (a0)
7301 // sw hi, 4(a0)
7302 // vlse vX, (a0)
7303 //
7304 // vid.v vVid
7305 // vmseq.vx mMask, vVid, 0
7306 // vmerge.vvm vDest, vSrc, vVal, mMask
7307 MVT VT = Op.getSimpleValueType();
7308 SDValue Vec = Op.getOperand(1);
7309 SDValue VL = getVLOperand(Op);
7310
7311 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
7312 if (Op.getOperand(1).isUndef())
7313 return SplattedVal;
7314 SDValue SplattedIdx =
7315 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
7316 DAG.getConstant(0, DL, MVT::i32), VL);
7317
7318 MVT MaskVT = getMaskTypeFor(VT);
7319 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
7320 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
7321 SDValue SelectCond =
7322 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
7323 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
7324 DAG.getUNDEF(MaskVT), Mask, VL});
7325 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
7326 Vec, VL);
7327 }
7328 }
7329
7330 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
7331}
7332
7333SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
7334 SelectionDAG &DAG) const {
7335 unsigned IntNo = Op.getConstantOperandVal(1);
7336 switch (IntNo) {
7337 default:
7338 break;
7339 case Intrinsic::riscv_masked_strided_load: {
7340 SDLoc DL(Op);
7341 MVT XLenVT = Subtarget.getXLenVT();
7342
7343 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
7344 // the selection of the masked intrinsics doesn't do this for us.
7345 SDValue Mask = Op.getOperand(5);
7346 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
7347
7348 MVT VT = Op->getSimpleValueType(0);
7349 MVT ContainerVT = VT;
7350 if (VT.isFixedLengthVector())
7351 ContainerVT = getContainerForFixedLengthVector(VT);
7352
7353 SDValue PassThru = Op.getOperand(2);
7354 if (!IsUnmasked) {
7355 MVT MaskVT = getMaskTypeFor(ContainerVT);
7356 if (VT.isFixedLengthVector()) {
7357 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7358 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
7359 }
7360 }
7361
7363 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7364 SDValue Ptr = Op.getOperand(3);
7365 SDValue Stride = Op.getOperand(4);
7366 SDValue Result, Chain;
7367
7368 // TODO: We restrict this to unmasked loads currently in consideration of
7369 // the complexity of hanlding all falses masks.
7370 if (IsUnmasked && isNullConstant(Stride)) {
7371 MVT ScalarVT = ContainerVT.getVectorElementType();
7372 SDValue ScalarLoad =
7373 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
7374 ScalarVT, Load->getMemOperand());
7375 Chain = ScalarLoad.getValue(1);
7376 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
7377 Subtarget);
7378 } else {
7379 SDValue IntID = DAG.getTargetConstant(
7380 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
7381 XLenVT);
7382
7383 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
7384 if (IsUnmasked)
7385 Ops.push_back(DAG.getUNDEF(ContainerVT));
7386 else
7387 Ops.push_back(PassThru);
7388 Ops.push_back(Ptr);
7389 Ops.push_back(Stride);
7390 if (!IsUnmasked)
7391 Ops.push_back(Mask);
7392 Ops.push_back(VL);
7393 if (!IsUnmasked) {
7394 SDValue Policy =
7396 Ops.push_back(Policy);
7397 }
7398
7399 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
7400 Result =
7402 Load->getMemoryVT(), Load->getMemOperand());
7403 Chain = Result.getValue(1);
7404 }
7405 if (VT.isFixedLengthVector())
7406 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
7407 return DAG.getMergeValues({Result, Chain}, DL);
7408 }
7409 case Intrinsic::riscv_seg2_load:
7410 case Intrinsic::riscv_seg3_load:
7411 case Intrinsic::riscv_seg4_load:
7412 case Intrinsic::riscv_seg5_load:
7413 case Intrinsic::riscv_seg6_load:
7414 case Intrinsic::riscv_seg7_load:
7415 case Intrinsic::riscv_seg8_load: {
7416 SDLoc DL(Op);
7417 static const Intrinsic::ID VlsegInts[7] = {
7418 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
7419 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
7420 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
7421 Intrinsic::riscv_vlseg8};
7422 unsigned NF = Op->getNumValues() - 1;
7423 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
7424 MVT XLenVT = Subtarget.getXLenVT();
7425 MVT VT = Op->getSimpleValueType(0);
7426 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7427
7428 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
7429 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
7431 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
7432 ContainerVTs.push_back(MVT::Other);
7433 SDVTList VTs = DAG.getVTList(ContainerVTs);
7434 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
7435 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
7436 Ops.push_back(Op.getOperand(2));
7437 Ops.push_back(VL);
7438 SDValue Result =
7440 Load->getMemoryVT(), Load->getMemOperand());
7442 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
7443 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
7444 DAG, Subtarget));
7445 Results.push_back(Result.getValue(NF));
7446 return DAG.getMergeValues(Results, DL);
7447 }
7448 }
7449
7450 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
7451}
7452
7453SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
7454 SelectionDAG &DAG) const {
7455 unsigned IntNo = Op.getConstantOperandVal(1);
7456 switch (IntNo) {
7457 default:
7458 break;
7459 case Intrinsic::riscv_masked_strided_store: {
7460 SDLoc DL(Op);
7461 MVT XLenVT = Subtarget.getXLenVT();
7462
7463 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
7464 // the selection of the masked intrinsics doesn't do this for us.
7465 SDValue Mask = Op.getOperand(5);
7466 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
7467
7468 SDValue Val = Op.getOperand(2);
7469 MVT VT = Val.getSimpleValueType();
7470 MVT ContainerVT = VT;
7471 if (VT.isFixedLengthVector()) {
7472 ContainerVT = getContainerForFixedLengthVector(VT);
7473 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
7474 }
7475 if (!IsUnmasked) {
7476 MVT MaskVT = getMaskTypeFor(ContainerVT);
7477 if (VT.isFixedLengthVector())
7478 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7479 }
7480
7481 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7482
7483 SDValue IntID = DAG.getTargetConstant(
7484 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
7485 XLenVT);
7486
7488 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
7489 Ops.push_back(Val);
7490 Ops.push_back(Op.getOperand(3)); // Ptr
7491 Ops.push_back(Op.getOperand(4)); // Stride
7492 if (!IsUnmasked)
7493 Ops.push_back(Mask);
7494 Ops.push_back(VL);
7495
7496 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
7497 Ops, Store->getMemoryVT(),
7498 Store->getMemOperand());
7499 }
7500 case Intrinsic::riscv_seg2_store:
7501 case Intrinsic::riscv_seg3_store:
7502 case Intrinsic::riscv_seg4_store:
7503 case Intrinsic::riscv_seg5_store:
7504 case Intrinsic::riscv_seg6_store:
7505 case Intrinsic::riscv_seg7_store:
7506 case Intrinsic::riscv_seg8_store: {
7507 SDLoc DL(Op);
7508 static const Intrinsic::ID VssegInts[] = {
7509 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
7510 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
7511 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
7512 Intrinsic::riscv_vsseg8};
7513 // Operands are (chain, int_id, vec*, ptr, vl)
7514 unsigned NF = Op->getNumOperands() - 4;
7515 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
7516 MVT XLenVT = Subtarget.getXLenVT();
7517 MVT VT = Op->getOperand(2).getSimpleValueType();
7518 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7519
7520 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
7521 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
7522 SDValue Ptr = Op->getOperand(NF + 2);
7523
7524 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
7525 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
7526 for (unsigned i = 0; i < NF; i++)
7528 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
7529 Ops.append({Ptr, VL});
7530
7531 return DAG.getMemIntrinsicNode(
7532 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
7533 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
7534 }
7535 }
7536
7537 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
7538}
7539
7540static unsigned getRVVReductionOp(unsigned ISDOpcode) {
7541 switch (ISDOpcode) {
7542 default:
7543 llvm_unreachable("Unhandled reduction");
7544 case ISD::VECREDUCE_ADD:
7546 case ISD::VECREDUCE_UMAX:
7548 case ISD::VECREDUCE_SMAX:
7550 case ISD::VECREDUCE_UMIN:
7552 case ISD::VECREDUCE_SMIN:
7554 case ISD::VECREDUCE_AND:
7556 case ISD::VECREDUCE_OR:
7558 case ISD::VECREDUCE_XOR:
7560 }
7561}
7562
7563SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
7564 SelectionDAG &DAG,
7565 bool IsVP) const {
7566 SDLoc DL(Op);
7567 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
7568 MVT VecVT = Vec.getSimpleValueType();
7569 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
7570 Op.getOpcode() == ISD::VECREDUCE_OR ||
7571 Op.getOpcode() == ISD::VECREDUCE_XOR ||
7572 Op.getOpcode() == ISD::VP_REDUCE_AND ||
7573 Op.getOpcode() == ISD::VP_REDUCE_OR ||
7574 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
7575 "Unexpected reduction lowering");
7576
7577 MVT XLenVT = Subtarget.getXLenVT();
7578 assert(Op.getValueType() == XLenVT &&
7579 "Expected reduction output to be legalized to XLenVT");
7580
7581 MVT ContainerVT = VecVT;
7582 if (VecVT.isFixedLengthVector()) {
7583 ContainerVT = getContainerForFixedLengthVector(VecVT);
7584 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7585 }
7586
7587 SDValue Mask, VL;
7588 if (IsVP) {
7589 Mask = Op.getOperand(2);
7590 VL = Op.getOperand(3);
7591 } else {
7592 std::tie(Mask, VL) =
7593 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7594 }
7595
7596 unsigned BaseOpc;
7598 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7599
7600 switch (Op.getOpcode()) {
7601 default:
7602 llvm_unreachable("Unhandled reduction");
7603 case ISD::VECREDUCE_AND:
7604 case ISD::VP_REDUCE_AND: {
7605 // vcpop ~x == 0
7606 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
7607 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
7608 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
7609 CC = ISD::SETEQ;
7610 BaseOpc = ISD::AND;
7611 break;
7612 }
7613 case ISD::VECREDUCE_OR:
7614 case ISD::VP_REDUCE_OR:
7615 // vcpop x != 0
7616 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
7617 CC = ISD::SETNE;
7618 BaseOpc = ISD::OR;
7619 break;
7620 case ISD::VECREDUCE_XOR:
7621 case ISD::VP_REDUCE_XOR: {
7622 // ((vcpop x) & 1) != 0
7623 SDValue One = DAG.getConstant(1, DL, XLenVT);
7624 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
7625 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
7626 CC = ISD::SETNE;
7627 BaseOpc = ISD::XOR;
7628 break;
7629 }
7630 }
7631
7632 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
7633
7634 if (!IsVP)
7635 return SetCC;
7636
7637 // Now include the start value in the operation.
7638 // Note that we must return the start value when no elements are operated
7639 // upon. The vcpop instructions we've emitted in each case above will return
7640 // 0 for an inactive vector, and so we've already received the neutral value:
7641 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
7642 // can simply include the start value.
7643 return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
7644}
7645
7646static bool isNonZeroAVL(SDValue AVL) {
7647 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
7648 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
7649 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
7650 (ImmAVL && ImmAVL->getZExtValue() >= 1);
7651}
7652
7653/// Helper to lower a reduction sequence of the form:
7654/// scalar = reduce_op vec, scalar_start
7655static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
7656 SDValue StartValue, SDValue Vec, SDValue Mask,
7657 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
7658 const RISCVSubtarget &Subtarget) {
7659 const MVT VecVT = Vec.getSimpleValueType();
7660 const MVT M1VT = getLMUL1VT(VecVT);
7661 const MVT XLenVT = Subtarget.getXLenVT();
7662 const bool NonZeroAVL = isNonZeroAVL(VL);
7663
7664 // The reduction needs an LMUL1 input; do the splat at either LMUL1
7665 // or the original VT if fractional.
7666 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
7667 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
7668 // prove it is non-zero. For the AVL=0 case, we need the scalar to
7669 // be the result of the reduction operation.
7670 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
7671 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
7672 DAG, Subtarget);
7673 if (M1VT != InnerVT)
7674 InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT,
7675 DAG.getUNDEF(M1VT),
7676 InitialValue, DAG.getConstant(0, DL, XLenVT));
7677 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
7678 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
7679 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
7680 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
7681 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
7682 DAG.getConstant(0, DL, XLenVT));
7683}
7684
7685SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
7686 SelectionDAG &DAG) const {
7687 SDLoc DL(Op);
7688 SDValue Vec = Op.getOperand(0);
7689 EVT VecEVT = Vec.getValueType();
7690
7691 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
7692
7693 // Due to ordering in legalize types we may have a vector type that needs to
7694 // be split. Do that manually so we can get down to a legal type.
7695 while (getTypeAction(*DAG.getContext(), VecEVT) ==
7697 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
7698 VecEVT = Lo.getValueType();
7699 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
7700 }
7701
7702 // TODO: The type may need to be widened rather than split. Or widened before
7703 // it can be split.
7704 if (!isTypeLegal(VecEVT))
7705 return SDValue();
7706
7707 MVT VecVT = VecEVT.getSimpleVT();
7708 MVT VecEltVT = VecVT.getVectorElementType();
7709 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
7710
7711 MVT ContainerVT = VecVT;
7712 if (VecVT.isFixedLengthVector()) {
7713 ContainerVT = getContainerForFixedLengthVector(VecVT);
7714 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7715 }
7716
7717 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7718
7719 SDValue NeutralElem =
7720 DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
7721 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), NeutralElem, Vec,
7722 Mask, VL, DL, DAG, Subtarget);
7723}
7724
7725// Given a reduction op, this function returns the matching reduction opcode,
7726// the vector SDValue and the scalar SDValue required to lower this to a
7727// RISCVISD node.
7728static std::tuple<unsigned, SDValue, SDValue>
7730 SDLoc DL(Op);
7731 auto Flags = Op->getFlags();
7732 unsigned Opcode = Op.getOpcode();
7733 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
7734 switch (Opcode) {
7735 default:
7736 llvm_unreachable("Unhandled reduction");
7737 case ISD::VECREDUCE_FADD: {
7738 // Use positive zero if we can. It is cheaper to materialize.
7739 SDValue Zero =
7740 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
7741 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
7742 }
7743 case ISD::VECREDUCE_SEQ_FADD:
7744 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
7745 Op.getOperand(0));
7746 case ISD::VECREDUCE_FMIN:
7747 return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
7748 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
7749 case ISD::VECREDUCE_FMAX:
7750 return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
7751 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
7752 }
7753}
7754
7755SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
7756 SelectionDAG &DAG) const {
7757 SDLoc DL(Op);
7758 MVT VecEltVT = Op.getSimpleValueType();
7759
7760 unsigned RVVOpcode;
7761 SDValue VectorVal, ScalarVal;
7762 std::tie(RVVOpcode, VectorVal, ScalarVal) =
7763 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
7764 MVT VecVT = VectorVal.getSimpleValueType();
7765
7766 MVT ContainerVT = VecVT;
7767 if (VecVT.isFixedLengthVector()) {
7768 ContainerVT = getContainerForFixedLengthVector(VecVT);
7769 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
7770 }
7771
7772 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7773 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal,
7774 VectorVal, Mask, VL, DL, DAG, Subtarget);
7775}
7776
7777static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
7778 switch (ISDOpcode) {
7779 default:
7780 llvm_unreachable("Unhandled reduction");
7781 case ISD::VP_REDUCE_ADD:
7783 case ISD::VP_REDUCE_UMAX:
7785 case ISD::VP_REDUCE_SMAX:
7787 case ISD::VP_REDUCE_UMIN:
7789 case ISD::VP_REDUCE_SMIN:
7791 case ISD::VP_REDUCE_AND:
7793 case ISD::VP_REDUCE_OR:
7795 case ISD::VP_REDUCE_XOR:
7797 case ISD::VP_REDUCE_FADD:
7799 case ISD::VP_REDUCE_SEQ_FADD:
7801 case ISD::VP_REDUCE_FMAX:
7803 case ISD::VP_REDUCE_FMIN:
7805 }
7806}
7807
7808SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
7809 SelectionDAG &DAG) const {
7810 SDLoc DL(Op);
7811 SDValue Vec = Op.getOperand(1);
7812 EVT VecEVT = Vec.getValueType();
7813
7814 // TODO: The type may need to be widened rather than split. Or widened before
7815 // it can be split.
7816 if (!isTypeLegal(VecEVT))
7817 return SDValue();
7818
7819 MVT VecVT = VecEVT.getSimpleVT();
7820 unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
7821
7822 if (VecVT.isFixedLengthVector()) {
7823 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
7824 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7825 }
7826
7827 SDValue VL = Op.getOperand(3);
7828 SDValue Mask = Op.getOperand(2);
7829 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
7830 Vec, Mask, VL, DL, DAG, Subtarget);
7831}
7832
7833SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
7834 SelectionDAG &DAG) const {
7835 SDValue Vec = Op.getOperand(0);
7836 SDValue SubVec = Op.getOperand(1);
7837 MVT VecVT = Vec.getSimpleValueType();
7838 MVT SubVecVT = SubVec.getSimpleValueType();
7839
7840 SDLoc DL(Op);
7841 MVT XLenVT = Subtarget.getXLenVT();
7842 unsigned OrigIdx = Op.getConstantOperandVal(2);
7843 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
7844
7845 // We don't have the ability to slide mask vectors up indexed by their i1
7846 // elements; the smallest we can do is i8. Often we are able to bitcast to
7847 // equivalent i8 vectors. Note that when inserting a fixed-length vector
7848 // into a scalable one, we might not necessarily have enough scalable
7849 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
7850 if (SubVecVT.getVectorElementType() == MVT::i1 &&
7851 (OrigIdx != 0 || !Vec.isUndef())) {
7852 if (VecVT.getVectorMinNumElements() >= 8 &&
7853 SubVecVT.getVectorMinNumElements() >= 8) {
7854 assert(OrigIdx % 8 == 0 && "Invalid index");
7855 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
7856 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
7857 "Unexpected mask vector lowering");
7858 OrigIdx /= 8;
7859 SubVecVT =
7860 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
7861 SubVecVT.isScalableVector());
7862 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
7863 VecVT.isScalableVector());
7864 Vec = DAG.getBitcast(VecVT, Vec);
7865 SubVec = DAG.getBitcast(SubVecVT, SubVec);
7866 } else {
7867 // We can't slide this mask vector up indexed by its i1 elements.
7868 // This poses a problem when we wish to insert a scalable vector which
7869 // can't be re-expressed as a larger type. Just choose the slow path and
7870 // extend to a larger type, then truncate back down.
7871 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
7872 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
7873 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
7874 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
7875 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
7876 Op.getOperand(2));
7877 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
7878 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
7879 }
7880 }
7881
7882 // If the subvector vector is a fixed-length type, we cannot use subregister
7883 // manipulation to simplify the codegen; we don't know which register of a
7884 // LMUL group contains the specific subvector as we only know the minimum
7885 // register size. Therefore we must slide the vector group up the full
7886 // amount.
7887 if (SubVecVT.isFixedLengthVector()) {
7888 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
7889 return Op;
7890 MVT ContainerVT = VecVT;
7891 if (VecVT.isFixedLengthVector()) {
7892 ContainerVT = getContainerForFixedLengthVector(VecVT);
7893 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7894 }
7895 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
7896 DAG.getUNDEF(ContainerVT), SubVec,
7897 DAG.getConstant(0, DL, XLenVT));
7898 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
7899 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
7900 return DAG.getBitcast(Op.getValueType(), SubVec);
7901 }
7902 SDValue Mask =
7903 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
7904 // Set the vector length to only the number of elements we care about. Note
7905 // that for slideup this includes the offset.
7906 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
7907 SDValue VL = getVLOp(EndIndex, DL, DAG, Subtarget);
7908
7909 // Use tail agnostic policy if we're inserting over Vec's tail.
7911 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
7912 Policy = RISCVII::TAIL_AGNOSTIC;
7913
7914 // If we're inserting into the lowest elements, use a tail undisturbed
7915 // vmv.v.v.
7916 if (OrigIdx == 0) {
7917 SubVec =
7918 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
7919 } else {
7920 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
7921 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
7922 SlideupAmt, Mask, VL, Policy);
7923 }
7924
7925 if (VecVT.isFixedLengthVector())
7926 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
7927 return DAG.getBitcast(Op.getValueType(), SubVec);
7928 }
7929
7930 unsigned SubRegIdx, RemIdx;
7931 std::tie(SubRegIdx, RemIdx) =
7933 VecVT, SubVecVT, OrigIdx, TRI);
7934
7935 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
7936 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
7937 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
7938 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
7939
7940 // 1. If the Idx has been completely eliminated and this subvector's size is
7941 // a vector register or a multiple thereof, or the surrounding elements are
7942 // undef, then this is a subvector insert which naturally aligns to a vector
7943 // register. These can easily be handled using subregister manipulation.
7944 // 2. If the subvector is smaller than a vector register, then the insertion
7945 // must preserve the undisturbed elements of the register. We do this by
7946 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
7947 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
7948 // subvector within the vector register, and an INSERT_SUBVECTOR of that
7949 // LMUL=1 type back into the larger vector (resolving to another subregister
7950 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
7951 // to avoid allocating a large register group to hold our subvector.
7952 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
7953 return Op;
7954
7955 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
7956 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
7957 // (in our case undisturbed). This means we can set up a subvector insertion
7958 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
7959 // size of the subvector.
7960 MVT InterSubVT = VecVT;
7961 SDValue AlignedExtract = Vec;
7962 unsigned AlignedIdx = OrigIdx - RemIdx;
7963 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
7964 InterSubVT = getLMUL1VT(VecVT);
7965 // Extract a subvector equal to the nearest full vector register type. This
7966 // should resolve to a EXTRACT_SUBREG instruction.
7967 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
7968 DAG.getConstant(AlignedIdx, DL, XLenVT));
7969 }
7970
7971 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
7972 DAG.getUNDEF(InterSubVT), SubVec,
7973 DAG.getConstant(0, DL, XLenVT));
7974
7975 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
7976
7977 VL = computeVLMax(SubVecVT, DL, DAG);
7978
7979 // If we're inserting into the lowest elements, use a tail undisturbed
7980 // vmv.v.v.
7981 if (RemIdx == 0) {
7982 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
7983 SubVec, VL);
7984 } else {
7985 SDValue SlideupAmt =
7986 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
7987
7988 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
7989 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
7990
7991 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
7992 SlideupAmt, Mask, VL);
7993 }
7994
7995 // If required, insert this subvector back into the correct vector register.
7996 // This should resolve to an INSERT_SUBREG instruction.
7997 if (VecVT.bitsGT(InterSubVT))
7998 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
7999 DAG.getConstant(AlignedIdx, DL, XLenVT));
8000
8001 // We might have bitcast from a mask type: cast back to the original type if
8002 // required.
8003 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
8004}
8005
8006SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
8007 SelectionDAG &DAG) const {
8008 SDValue Vec = Op.getOperand(0);
8009 MVT SubVecVT = Op.getSimpleValueType();
8010 MVT VecVT = Vec.getSimpleValueType();
8011
8012 SDLoc DL(Op);
8013 MVT XLenVT = Subtarget.getXLenVT();
8014 unsigned OrigIdx = Op.getConstantOperandVal(1);
8015 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
8016
8017 // We don't have the ability to slide mask vectors down indexed by their i1
8018 // elements; the smallest we can do is i8. Often we are able to bitcast to
8019 // equivalent i8 vectors. Note that when extracting a fixed-length vector
8020 // from a scalable one, we might not necessarily have enough scalable
8021 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
8022 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
8023 if (VecVT.getVectorMinNumElements() >= 8 &&
8024 SubVecVT.getVectorMinNumElements() >= 8) {
8025 assert(OrigIdx % 8 == 0 && "Invalid index");
8026 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
8027 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
8028 "Unexpected mask vector lowering");
8029 OrigIdx /= 8;
8030 SubVecVT =
8031 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
8032 SubVecVT.isScalableVector());
8033 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
8034 VecVT.isScalableVector());
8035 Vec = DAG.getBitcast(VecVT, Vec);
8036 } else {
8037 // We can't slide this mask vector down, indexed by its i1 elements.
8038 // This poses a problem when we wish to extract a scalable vector which
8039 // can't be re-expressed as a larger type. Just choose the slow path and
8040 // extend to a larger type, then truncate back down.
8041 // TODO: We could probably improve this when extracting certain fixed
8042 // from fixed, where we can extract as i8 and shift the correct element
8043 // right to reach the desired subvector?
8044 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
8045 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
8046 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
8047 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
8048 Op.getOperand(1));
8049 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
8050 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
8051 }
8052 }
8053
8054 // If the subvector vector is a fixed-length type, we cannot use subregister
8055 // manipulation to simplify the codegen; we don't know which register of a
8056 // LMUL group contains the specific subvector as we only know the minimum
8057 // register size. Therefore we must slide the vector group down the full
8058 // amount.
8059 if (SubVecVT.isFixedLengthVector()) {
8060 // With an index of 0 this is a cast-like subvector, which can be performed
8061 // with subregister operations.
8062 if (OrigIdx == 0)
8063 return Op;
8064 MVT ContainerVT = VecVT;
8065 if (VecVT.isFixedLengthVector()) {
8066 ContainerVT = getContainerForFixedLengthVector(VecVT);
8067 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8068 }
8069 SDValue Mask =
8070 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
8071 // Set the vector length to only the number of elements we care about. This
8072 // avoids sliding down elements we're going to discard straight away.
8073 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), DL, DAG, Subtarget);
8074 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
8075 SDValue Slidedown =
8076 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8077 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
8078 // Now we can use a cast-like subvector extract to get the result.
8079 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
8080 DAG.getConstant(0, DL, XLenVT));
8081 return DAG.getBitcast(Op.getValueType(), Slidedown);
8082 }
8083
8084 unsigned SubRegIdx, RemIdx;
8085 std::tie(SubRegIdx, RemIdx) =
8087 VecVT, SubVecVT, OrigIdx, TRI);
8088
8089 // If the Idx has been completely eliminated then this is a subvector extract
8090 // which naturally aligns to a vector register. These can easily be handled
8091 // using subregister manipulation.
8092 if (RemIdx == 0)
8093 return Op;
8094
8095 // Else we must shift our vector register directly to extract the subvector.
8096 // Do this using VSLIDEDOWN.
8097
8098 // If the vector type is an LMUL-group type, extract a subvector equal to the
8099 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
8100 // instruction.
8101 MVT InterSubVT = VecVT;
8102 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
8103 InterSubVT = getLMUL1VT(VecVT);
8104 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
8105 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
8106 }
8107
8108 // Slide this vector register down by the desired number of elements in order
8109 // to place the desired subvector starting at element 0.
8110 SDValue SlidedownAmt =
8111 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
8112
8113 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
8114 SDValue Slidedown =
8115 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
8116 Vec, SlidedownAmt, Mask, VL);
8117
8118 // Now the vector is in the right position, extract our final subvector. This
8119 // should resolve to a COPY.
8120 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
8121 DAG.getConstant(0, DL, XLenVT));
8122
8123 // We might have bitcast from a mask type: cast back to the original type if
8124 // required.
8125 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
8126}
8127
8128// Widen a vector's operands to i8, then truncate its results back to the
8129// original type, typically i1. All operand and result types must be the same.
8131 SelectionDAG &DAG) {
8132 MVT VT = N.getSimpleValueType();
8133 MVT WideVT = VT.changeVectorElementType(MVT::i8);
8135 for (SDValue Op : N->ops()) {
8136 assert(Op.getSimpleValueType() == VT &&
8137 "Operands and result must be same type");
8138 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
8139 }
8140
8141 unsigned NumVals = N->getNumValues();
8142
8144 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
8145 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
8146 SmallVector<SDValue, 4> TruncVals;
8147 for (unsigned I = 0; I < NumVals; I++) {
8148 TruncVals.push_back(
8149 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
8150 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
8151 }
8152
8153 if (TruncVals.size() > 1)
8154 return DAG.getMergeValues(TruncVals, DL);
8155 return TruncVals.front();
8156}
8157
8158SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
8159 SelectionDAG &DAG) const {
8160 SDLoc DL(Op);
8161 MVT VecVT = Op.getSimpleValueType();
8162 MVT XLenVT = Subtarget.getXLenVT();
8163
8164 assert(VecVT.isScalableVector() &&
8165 "vector_interleave on non-scalable vector!");
8166
8167 // 1 bit element vectors need to be widened to e8
8168 if (VecVT.getVectorElementType() == MVT::i1)
8169 return widenVectorOpsToi8(Op, DL, DAG);
8170
8171 // If the VT is LMUL=8, we need to split and reassemble.
8172 if (VecVT.getSizeInBits().getKnownMinValue() ==
8173 (8 * RISCV::RVVBitsPerBlock)) {
8174 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
8175 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
8176 EVT SplitVT = Op0Lo.getValueType();
8177
8179 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
8181 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
8182
8183 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
8184 ResLo.getValue(0), ResHi.getValue(0));
8185 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
8186 ResHi.getValue(1));
8187 return DAG.getMergeValues({Even, Odd}, DL);
8188 }
8189
8190 // Concatenate the two vectors as one vector to deinterleave
8191 MVT ConcatVT =
8194 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
8195 Op.getOperand(0), Op.getOperand(1));
8196
8197 // We want to operate on all lanes, so get the mask and VL and mask for it
8198 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
8199 SDValue Passthru = DAG.getUNDEF(ConcatVT);
8200
8201 // We can deinterleave through vnsrl.wi if the element type is smaller than
8202 // ELEN
8203 if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) {
8204 SDValue Even =
8205 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
8206 SDValue Odd =
8207 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
8208 return DAG.getMergeValues({Even, Odd}, DL);
8209 }
8210
8211 // For the indices, use the same SEW to avoid an extra vsetvli
8212 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
8213 // Create a vector of even indices {0, 2, 4, ...}
8214 SDValue EvenIdx =
8215 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
8216 // Create a vector of odd indices {1, 3, 5, ... }
8217 SDValue OddIdx =
8218 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
8219
8220 // Gather the even and odd elements into two separate vectors
8221 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
8222 Concat, EvenIdx, Passthru, Mask, VL);
8223 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
8224 Concat, OddIdx, Passthru, Mask, VL);
8225
8226 // Extract the result half of the gather for even and odd
8227 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
8228 DAG.getConstant(0, DL, XLenVT));
8229 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
8230 DAG.getConstant(0, DL, XLenVT));
8231
8232 return DAG.getMergeValues({Even, Odd}, DL);
8233}
8234
8235SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
8236 SelectionDAG &DAG) const {
8237 SDLoc DL(Op);
8238 MVT VecVT = Op.getSimpleValueType();
8239
8240 assert(VecVT.isScalableVector() &&
8241 "vector_interleave on non-scalable vector!");
8242
8243 // i1 vectors need to be widened to i8
8244 if (VecVT.getVectorElementType() == MVT::i1)
8245 return widenVectorOpsToi8(Op, DL, DAG);
8246
8247 MVT XLenVT = Subtarget.getXLenVT();
8248 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
8249
8250 // If the VT is LMUL=8, we need to split and reassemble.
8251 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
8252 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
8253 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
8254 EVT SplitVT = Op0Lo.getValueType();
8255
8257 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
8259 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
8260
8261 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
8262 ResLo.getValue(0), ResLo.getValue(1));
8263 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
8264 ResHi.getValue(0), ResHi.getValue(1));
8265 return DAG.getMergeValues({Lo, Hi}, DL);
8266 }
8267
8268 SDValue Interleaved;
8269
8270 // If the element type is smaller than ELEN, then we can interleave with
8271 // vwaddu.vv and vwmaccu.vx
8272 if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) {
8273 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
8274 DAG, Subtarget);
8275 } else {
8276 // Otherwise, fallback to using vrgathere16.vv
8277 MVT ConcatVT =
8280 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
8281 Op.getOperand(0), Op.getOperand(1));
8282
8283 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
8284
8285 // 0 1 2 3 4 5 6 7 ...
8286 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
8287
8288 // 1 1 1 1 1 1 1 1 ...
8289 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
8290
8291 // 1 0 1 0 1 0 1 0 ...
8292 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
8293 OddMask = DAG.getSetCC(
8294 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
8295 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
8297
8298 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
8299
8300 // Build up the index vector for interleaving the concatenated vector
8301 // 0 0 1 1 2 2 3 3 ...
8302 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
8303 // 0 n 1 n+1 2 n+2 3 n+3 ...
8304 Idx =
8305 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
8306
8307 // Then perform the interleave
8308 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
8309 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
8310 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
8311 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
8312 }
8313
8314 // Extract the two halves from the interleaved result
8315 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
8316 DAG.getVectorIdxConstant(0, DL));
8317 SDValue Hi = DAG.getNode(
8318 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
8320
8321 return DAG.getMergeValues({Lo, Hi}, DL);
8322}
8323
8324// Lower step_vector to the vid instruction. Any non-identity step value must
8325// be accounted for my manual expansion.
8326SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
8327 SelectionDAG &DAG) const {
8328 SDLoc DL(Op);
8329 MVT VT = Op.getSimpleValueType();
8330 assert(VT.isScalableVector() && "Expected scalable vector");
8331 MVT XLenVT = Subtarget.getXLenVT();
8332 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
8333 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
8334 uint64_t StepValImm = Op.getConstantOperandVal(0);
8335 if (StepValImm != 1) {
8336 if (isPowerOf2_64(StepValImm)) {
8337 SDValue StepVal =
8338 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8339 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
8340 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
8341 } else {
8342 SDValue StepVal = lowerScalarSplat(
8343 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
8344 VL, VT, DL, DAG, Subtarget);
8345 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
8346 }
8347 }
8348 return StepVec;
8349}
8350
8351// Implement vector_reverse using vrgather.vv with indices determined by
8352// subtracting the id of each element from (VLMAX-1). This will convert
8353// the indices like so:
8354// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
8355// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
8356SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
8357 SelectionDAG &DAG) const {
8358 SDLoc DL(Op);
8359 MVT VecVT = Op.getSimpleValueType();
8360 if (VecVT.getVectorElementType() == MVT::i1) {
8361 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8362 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
8363 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
8364 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
8365 }
8366 unsigned EltSize = VecVT.getScalarSizeInBits();
8367 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
8368 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
8369 unsigned MaxVLMAX =
8370 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
8371
8372 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
8373 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
8374
8375 // If this is SEW=8 and VLMAX is potentially more than 256, we need
8376 // to use vrgatherei16.vv.
8377 // TODO: It's also possible to use vrgatherei16.vv for other types to
8378 // decrease register width for the index calculation.
8379 if (MaxVLMAX > 256 && EltSize == 8) {
8380 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
8381 // Reverse each half, then reassemble them in reverse order.
8382 // NOTE: It's also possible that after splitting that VLMAX no longer
8383 // requires vrgatherei16.vv.
8384 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
8385 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
8386 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
8387 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
8388 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
8389 // Reassemble the low and high pieces reversed.
8390 // FIXME: This is a CONCAT_VECTORS.
8391 SDValue Res =
8392 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
8393 DAG.getIntPtrConstant(0, DL));
8394 return DAG.getNode(
8395 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
8396 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
8397 }
8398
8399 // Just promote the int type to i16 which will double the LMUL.
8400 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
8401 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
8402 }
8403
8404 MVT XLenVT = Subtarget.getXLenVT();
8405 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
8406
8407 // Calculate VLMAX-1 for the desired SEW.
8408 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
8409 computeVLMax(VecVT, DL, DAG),
8410 DAG.getConstant(1, DL, XLenVT));
8411
8412 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
8413 bool IsRV32E64 =
8414 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
8415 SDValue SplatVL;
8416 if (!IsRV32E64)
8417 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
8418 else
8419 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
8420 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
8421
8422 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
8423 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
8424 DAG.getUNDEF(IntVT), Mask, VL);
8425
8426 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
8427 DAG.getUNDEF(VecVT), Mask, VL);
8428}
8429
8430SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
8431 SelectionDAG &DAG) const {
8432 SDLoc DL(Op);
8433 SDValue V1 = Op.getOperand(0);
8434 SDValue V2 = Op.getOperand(1);
8435 MVT XLenVT = Subtarget.getXLenVT();
8436 MVT VecVT = Op.getSimpleValueType();
8437
8438 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
8439
8440 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
8441 SDValue DownOffset, UpOffset;
8442 if (ImmValue >= 0) {
8443 // The operand is a TargetConstant, we need to rebuild it as a regular
8444 // constant.
8445 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
8446 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
8447 } else {
8448 // The operand is a TargetConstant, we need to rebuild it as a regular
8449 // constant rather than negating the original operand.
8450 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
8451 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
8452 }
8453
8454 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
8455
8456 SDValue SlideDown =
8457 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
8458 DownOffset, TrueMask, UpOffset);
8459 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
8460 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
8462}
8463
8464SDValue
8465RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
8466 SelectionDAG &DAG) const {
8467 SDLoc DL(Op);
8468 auto *Load = cast<LoadSDNode>(Op);
8469
8471 Load->getMemoryVT(),
8472 *Load->getMemOperand()) &&
8473 "Expecting a correctly-aligned load");
8474
8475 MVT VT = Op.getSimpleValueType();
8476 MVT XLenVT = Subtarget.getXLenVT();
8477 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8478
8479 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
8480
8481 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
8482 SDValue IntID = DAG.getTargetConstant(
8483 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
8484 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
8485 if (!IsMaskOp)
8486 Ops.push_back(DAG.getUNDEF(ContainerVT));
8487 Ops.push_back(Load->getBasePtr());
8488 Ops.push_back(VL);
8489 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
8490 SDValue NewLoad =
8492 Load->getMemoryVT(), Load->getMemOperand());
8493
8494 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
8495 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
8496}
8497
8498SDValue
8499RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
8500 SelectionDAG &DAG) const {
8501 SDLoc DL(Op);
8502 auto *Store = cast<StoreSDNode>(Op);
8503
8505 Store->getMemoryVT(),
8506 *Store->getMemOperand()) &&
8507 "Expecting a correctly-aligned store");
8508
8509 SDValue StoreVal = Store->getValue();
8510 MVT VT = StoreVal.getSimpleValueType();
8511 MVT XLenVT = Subtarget.getXLenVT();
8512
8513 // If the size less than a byte, we need to pad with zeros to make a byte.
8514 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
8515 VT = MVT::v8i1;
8516 StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
8517 DAG.getConstant(0, DL, VT), StoreVal,
8518 DAG.getIntPtrConstant(0, DL));
8519 }
8520
8521 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8522
8523 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
8524
8525 SDValue NewValue =
8526 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
8527
8528 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
8529 SDValue IntID = DAG.getTargetConstant(
8530 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
8531 return DAG.getMemIntrinsicNode(
8532 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
8533 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
8534 Store->getMemoryVT(), Store->getMemOperand());
8535}
8536
8537SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
8538 SelectionDAG &DAG) const {
8539 SDLoc DL(Op);
8540 MVT VT = Op.getSimpleValueType();
8541
8542 const auto *MemSD = cast<MemSDNode>(Op);
8543 EVT MemVT = MemSD->getMemoryVT();
8544 MachineMemOperand *MMO = MemSD->getMemOperand();
8545 SDValue Chain = MemSD->getChain();
8546 SDValue BasePtr = MemSD->getBasePtr();
8547
8548 SDValue Mask, PassThru, VL;
8549 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
8550 Mask = VPLoad->getMask();
8551 PassThru = DAG.getUNDEF(VT);
8552 VL = VPLoad->getVectorLength();
8553 } else {
8554 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
8555 Mask = MLoad->getMask();
8556 PassThru = MLoad->getPassThru();
8557 }
8558
8559 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8560
8561 MVT XLenVT = Subtarget.getXLenVT();
8562
8563 MVT ContainerVT = VT;
8564 if (VT.isFixedLengthVector()) {
8565 ContainerVT = getContainerForFixedLengthVector(VT);
8566 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
8567 if (!IsUnmasked) {
8568 MVT MaskVT = getMaskTypeFor(ContainerVT);
8569 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8570 }
8571 }
8572
8573 if (!VL)
8574 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8575
8576 unsigned IntID =
8577 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
8578 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
8579 if (IsUnmasked)
8580 Ops.push_back(DAG.getUNDEF(ContainerVT));
8581 else
8582 Ops.push_back(PassThru);
8583 Ops.push_back(BasePtr);
8584 if (!IsUnmasked)
8585 Ops.push_back(Mask);
8586 Ops.push_back(VL);
8587 if (!IsUnmasked)
8589
8590 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
8591
8592 SDValue Result =
8593 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
8594 Chain = Result.getValue(1);
8595
8596 if (VT.isFixedLengthVector())
8597 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8598
8599 return DAG.getMergeValues({Result, Chain}, DL);
8600}
8601
8602SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
8603 SelectionDAG &DAG) const {
8604 SDLoc DL(Op);
8605
8606 const auto *MemSD = cast<MemSDNode>(Op);
8607 EVT MemVT = MemSD->getMemoryVT();
8608 MachineMemOperand *MMO = MemSD->getMemOperand();
8609 SDValue Chain = MemSD->getChain();
8610 SDValue BasePtr = MemSD->getBasePtr();
8611 SDValue Val, Mask, VL;
8612
8613 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
8614 Val = VPStore->getValue();
8615 Mask = VPStore->getMask();
8616 VL = VPStore->getVectorLength();
8617 } else {
8618 const auto *MStore = cast<MaskedStoreSDNode>(Op);
8619 Val = MStore->getValue();
8620 Mask = MStore->getMask();
8621 }
8622
8623 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8624
8625 MVT VT = Val.getSimpleValueType();
8626 MVT XLenVT = Subtarget.getXLenVT();
8627
8628 MVT ContainerVT = VT;
8629 if (VT.isFixedLengthVector()) {
8630 ContainerVT = getContainerForFixedLengthVector(VT);
8631
8632 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
8633 if (!IsUnmasked) {
8634 MVT MaskVT = getMaskTypeFor(ContainerVT);
8635 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8636 }
8637 }
8638
8639 if (!VL)
8640 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8641
8642 unsigned IntID =
8643 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
8644 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
8645 Ops.push_back(Val);
8646 Ops.push_back(BasePtr);
8647 if (!IsUnmasked)
8648 Ops.push_back(Mask);
8649 Ops.push_back(VL);
8650
8652 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
8653}
8654
8655SDValue
8656RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
8657 SelectionDAG &DAG) const {
8658 MVT InVT = Op.getOperand(0).getSimpleValueType();
8659 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
8660
8661 MVT VT = Op.getSimpleValueType();
8662
8663 SDValue Op1 =
8664 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8665 SDValue Op2 =
8666 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
8667
8668 SDLoc DL(Op);
8669 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
8670 DAG, Subtarget);
8671 MVT MaskVT = getMaskTypeFor(ContainerVT);
8672
8673 SDValue Cmp =
8674 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
8675 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
8676
8677 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
8678}
8679
8680SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
8681 SelectionDAG &DAG) const {
8682 unsigned Opc = Op.getOpcode();
8683 SDLoc DL(Op);
8684 SDValue Chain = Op.getOperand(0);
8685 SDValue Op1 = Op.getOperand(1);
8686 SDValue Op2 = Op.getOperand(2);
8687 SDValue CC = Op.getOperand(3);
8688 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
8689 MVT VT = Op.getSimpleValueType();
8690 MVT InVT = Op1.getSimpleValueType();
8691
8692 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
8693 // condition code.
8694 if (Opc == ISD::STRICT_FSETCCS) {
8695 // Expand strict_fsetccs(x, oeq) to
8696 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
8697 SDVTList VTList = Op->getVTList();
8698 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
8699 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
8700 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
8701 Op2, OLECCVal);
8702 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
8703 Op1, OLECCVal);
8704 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
8705 Tmp1.getValue(1), Tmp2.getValue(1));
8706 // Tmp1 and Tmp2 might be the same node.
8707 if (Tmp1 != Tmp2)
8708 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
8709 return DAG.getMergeValues({Tmp1, OutChain}, DL);
8710 }
8711
8712 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
8713 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
8714 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
8715 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
8716 Op2, OEQCCVal);
8717 SDValue Res = DAG.getNOT(DL, OEQ, VT);
8718 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
8719 }
8720 }
8721
8722 MVT ContainerInVT = InVT;
8723 if (InVT.isFixedLengthVector()) {
8724 ContainerInVT = getContainerForFixedLengthVector(InVT);
8725 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
8726 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
8727 }
8728 MVT MaskVT = getMaskTypeFor(ContainerInVT);
8729
8730 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
8731
8732 SDValue Res;
8733 if (Opc == ISD::STRICT_FSETCC &&
8734 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
8735 CCVal == ISD::SETOLE)) {
8736 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
8737 // active when both input elements are ordered.
8738 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
8739 SDValue OrderMask1 = DAG.getNode(
8740 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
8741 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
8742 True, VL});
8743 SDValue OrderMask2 = DAG.getNode(
8744 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
8745 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
8746 True, VL});
8747 Mask =
8748 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
8749 // Use Mask as the merge operand to let the result be 0 if either of the
8750 // inputs is unordered.
8752 DAG.getVTList(MaskVT, MVT::Other),
8753 {Chain, Op1, Op2, CC, Mask, Mask, VL});
8754 } else {
8755 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
8757 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
8758 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
8759 }
8760
8761 if (VT.isFixedLengthVector()) {
8762 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8763 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8764 }
8765 return Res;
8766}
8767
8768// Lower vector ABS to smax(X, sub(0, X)).
8769SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
8770 SDLoc DL(Op);
8771 MVT VT = Op.getSimpleValueType();
8772 SDValue X = Op.getOperand(0);
8773
8774 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
8775 "Unexpected type for ISD::ABS");
8776
8777 MVT ContainerVT = VT;
8778 if (VT.isFixedLengthVector()) {
8779 ContainerVT = getContainerForFixedLengthVector(VT);
8780 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
8781 }
8782
8783 SDValue Mask, VL;
8784 if (Op->getOpcode() == ISD::VP_ABS) {
8785 Mask = Op->getOperand(1);
8786 if (VT.isFixedLengthVector())
8787 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
8788 Subtarget);
8789 VL = Op->getOperand(2);
8790 } else
8791 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8792
8793 SDValue SplatZero = DAG.getNode(
8794 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
8795 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
8796 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
8797 DAG.getUNDEF(ContainerVT), Mask, VL);
8798 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
8799 DAG.getUNDEF(ContainerVT), Mask, VL);
8800
8801 if (VT.isFixedLengthVector())
8802 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
8803 return Max;
8804}
8805
8806SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
8807 SDValue Op, SelectionDAG &DAG) const {
8808 SDLoc DL(Op);
8809 MVT VT = Op.getSimpleValueType();
8810 SDValue Mag = Op.getOperand(0);
8811 SDValue Sign = Op.getOperand(1);
8812 assert(Mag.getValueType() == Sign.getValueType() &&
8813 "Can only handle COPYSIGN with matching types.");
8814
8815 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8816 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
8817 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
8818
8819 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8820
8821 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
8822 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
8823
8824 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
8825}
8826
8827SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
8828 SDValue Op, SelectionDAG &DAG) const {
8829 MVT VT = Op.getSimpleValueType();
8830 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8831
8832 MVT I1ContainerVT =
8833 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8834
8835 SDValue CC =
8836 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
8837 SDValue Op1 =
8838 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
8839 SDValue Op2 =
8840 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
8841
8842 SDLoc DL(Op);
8843 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8844
8845 SDValue Select =
8846 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
8847
8848 return convertFromScalableVector(VT, Select, DAG, Subtarget);
8849}
8850
8851SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
8852 SelectionDAG &DAG) const {
8853 unsigned NewOpc = getRISCVVLOp(Op);
8854 bool HasMergeOp = hasMergeOp(NewOpc);
8855 bool HasMask = hasMaskOp(NewOpc);
8856
8857 MVT VT = Op.getSimpleValueType();
8858 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8859
8860 // Create list of operands by converting existing ones to scalable types.
8862 for (const SDValue &V : Op->op_values()) {
8863 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
8864
8865 // Pass through non-vector operands.
8866 if (!V.getValueType().isVector()) {
8867 Ops.push_back(V);
8868 continue;
8869 }
8870
8871 // "cast" fixed length vector to a scalable vector.
8872 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
8873 "Only fixed length vectors are supported!");
8874 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
8875 }
8876
8877 SDLoc DL(Op);
8878 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8879 if (HasMergeOp)
8880 Ops.push_back(DAG.getUNDEF(ContainerVT));
8881 if (HasMask)
8882 Ops.push_back(Mask);
8883 Ops.push_back(VL);
8884
8885 // StrictFP operations have two result values. Their lowered result should
8886 // have same result count.
8887 if (Op->isStrictFPOpcode()) {
8888 SDValue ScalableRes =
8889 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
8890 Op->getFlags());
8891 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
8892 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
8893 }
8894
8895 SDValue ScalableRes =
8896 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
8897 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
8898}
8899
8900// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
8901// * Operands of each node are assumed to be in the same order.
8902// * The EVL operand is promoted from i32 to i64 on RV64.
8903// * Fixed-length vectors are converted to their scalable-vector container
8904// types.
8905SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
8906 unsigned RISCVISDOpc,
8907 bool HasMergeOp) const {
8908 SDLoc DL(Op);
8909 MVT VT = Op.getSimpleValueType();
8911
8912 MVT ContainerVT = VT;
8913 if (VT.isFixedLengthVector())
8914 ContainerVT = getContainerForFixedLengthVector(VT);
8915
8916 for (const auto &OpIdx : enumerate(Op->ops())) {
8917 SDValue V = OpIdx.value();
8918 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
8919 // Add dummy merge value before the mask.
8920 if (HasMergeOp && *ISD::getVPMaskIdx(Op.getOpcode()) == OpIdx.index())
8921 Ops.push_back(DAG.getUNDEF(ContainerVT));
8922 // Pass through operands which aren't fixed-length vectors.
8923 if (!V.getValueType().isFixedLengthVector()) {
8924 Ops.push_back(V);
8925 continue;
8926 }
8927 // "cast" fixed length vector to a scalable vector.
8928 MVT OpVT = V.getSimpleValueType();
8929 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
8930 assert(useRVVForFixedLengthVectorVT(OpVT) &&
8931 "Only fixed length vectors are supported!");
8932 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
8933 }
8934
8935 if (!VT.isFixedLengthVector())
8936 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
8937
8938 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
8939
8940 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
8941}
8942
8943SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
8944 SelectionDAG &DAG) const {
8945 SDLoc DL(Op);
8946 MVT VT = Op.getSimpleValueType();
8947
8948 SDValue Src = Op.getOperand(0);
8949 // NOTE: Mask is dropped.
8950 SDValue VL = Op.getOperand(2);
8951
8952 MVT ContainerVT = VT;
8953 if (VT.isFixedLengthVector()) {
8954 ContainerVT = getContainerForFixedLengthVector(VT);
8955 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8956 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
8957 }
8958
8959 MVT XLenVT = Subtarget.getXLenVT();
8960 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8961 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8962 DAG.getUNDEF(ContainerVT), Zero, VL);
8963
8964 SDValue SplatValue = DAG.getConstant(
8965 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
8966 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8967 DAG.getUNDEF(ContainerVT), SplatValue, VL);
8968
8969 SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src,
8970 Splat, ZeroSplat, VL);
8971 if (!VT.isFixedLengthVector())
8972 return Result;
8973 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8974}
8975
8976SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
8977 SelectionDAG &DAG) const {
8978 SDLoc DL(Op);
8979 MVT VT = Op.getSimpleValueType();
8980
8981 SDValue Op1 = Op.getOperand(0);
8982 SDValue Op2 = Op.getOperand(1);
8983 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8984 // NOTE: Mask is dropped.
8985 SDValue VL = Op.getOperand(4);
8986
8987 MVT ContainerVT = VT;
8988 if (VT.isFixedLengthVector()) {
8989 ContainerVT = getContainerForFixedLengthVector(VT);
8990 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
8991 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
8992 }
8993
8995 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
8996
8997 switch (Condition) {
8998 default:
8999 break;
9000 // X != Y --> (X^Y)
9001 case ISD::SETNE:
9002 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
9003 break;
9004 // X == Y --> ~(X^Y)
9005 case ISD::SETEQ: {
9006 SDValue Temp =
9007 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
9008 Result =
9009 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
9010 break;
9011 }
9012 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
9013 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
9014 case ISD::SETGT:
9015 case ISD::SETULT: {
9016 SDValue Temp =
9017 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
9018 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
9019 break;
9020 }
9021 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
9022 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
9023 case ISD::SETLT:
9024 case ISD::SETUGT: {
9025 SDValue Temp =
9026 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
9027 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
9028 break;
9029 }
9030 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
9031 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
9032 case ISD::SETGE:
9033 case ISD::SETULE: {
9034 SDValue Temp =
9035 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
9036 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
9037 break;
9038 }
9039 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
9040 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
9041 case ISD::SETLE:
9042 case ISD::SETUGE: {
9043 SDValue Temp =
9044 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
9045 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
9046 break;
9047 }
9048 }
9049
9050 if (!VT.isFixedLengthVector())
9051 return Result;
9052 return convertFromScalableVector(VT, Result, DAG, Subtarget);
9053}
9054
9055// Lower Floating-Point/Integer Type-Convert VP SDNodes
9056SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
9057 unsigned RISCVISDOpc) const {
9058 SDLoc DL(Op);
9059
9060 SDValue Src = Op.getOperand(0);
9061 SDValue Mask = Op.getOperand(1);
9062 SDValue VL = Op.getOperand(2);
9063
9064 MVT DstVT = Op.getSimpleValueType();
9065 MVT SrcVT = Src.getSimpleValueType();
9066 if (DstVT.isFixedLengthVector()) {
9067 DstVT = getContainerForFixedLengthVector(DstVT);
9068 SrcVT = getContainerForFixedLengthVector(SrcVT);
9069 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
9070 MVT MaskVT = getMaskTypeFor(DstVT);
9071 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9072 }
9073
9074 unsigned DstEltSize = DstVT.getScalarSizeInBits();
9075 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
9076
9078 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
9079 if (SrcVT.isInteger()) {
9080 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
9081
9082 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
9085
9086 // Do we need to do any pre-widening before converting?
9087 if (SrcEltSize == 1) {
9088 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
9089 MVT XLenVT = Subtarget.getXLenVT();
9090 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9091 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
9092 DAG.getUNDEF(IntVT), Zero, VL);
9093 SDValue One = DAG.getConstant(
9094 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
9095 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
9096 DAG.getUNDEF(IntVT), One, VL);
9097 Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat,
9098 ZeroSplat, VL);
9099 } else if (DstEltSize > (2 * SrcEltSize)) {
9100 // Widen before converting.
9101 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
9102 DstVT.getVectorElementCount());
9103 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
9104 }
9105
9106 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
9107 } else {
9108 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
9109 "Wrong input/output vector types");
9110
9111 // Convert f16 to f32 then convert f32 to i64.
9112 if (DstEltSize > (2 * SrcEltSize)) {
9113 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
9114 MVT InterimFVT =
9115 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
9116 Src =
9117 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
9118 }
9119
9120 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
9121 }
9122 } else { // Narrowing + Conversion
9123 if (SrcVT.isInteger()) {
9124 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
9125 // First do a narrowing convert to an FP type half the size, then round
9126 // the FP type to a small FP type if needed.
9127
9128 MVT InterimFVT = DstVT;
9129 if (SrcEltSize > (2 * DstEltSize)) {
9130 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
9131 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
9132 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
9133 }
9134
9135 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
9136
9137 if (InterimFVT != DstVT) {
9138 Src = Result;
9139 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
9140 }
9141 } else {
9142 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
9143 "Wrong input/output vector types");
9144 // First do a narrowing conversion to an integer half the size, then
9145 // truncate if needed.
9146
9147 if (DstEltSize == 1) {
9148 // First convert to the same size integer, then convert to mask using
9149 // setcc.
9150 assert(SrcEltSize >= 16 && "Unexpected FP type!");
9151 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
9152 DstVT.getVectorElementCount());
9153 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
9154
9155 // Compare the integer result to 0. The integer should be 0 or 1/-1,
9156 // otherwise the conversion was undefined.
9157 MVT XLenVT = Subtarget.getXLenVT();
9158 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9159 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
9160 DAG.getUNDEF(InterimIVT), SplatZero, VL);
9161 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
9162 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
9163 DAG.getUNDEF(DstVT), Mask, VL});
9164 } else {
9165 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
9166 DstVT.getVectorElementCount());
9167
9168 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
9169
9170 while (InterimIVT != DstVT) {
9171 SrcEltSize /= 2;
9172 Src = Result;
9173 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
9174 DstVT.getVectorElementCount());
9175 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
9176 Src, Mask, VL);
9177 }
9178 }
9179 }
9180 }
9181
9182 MVT VT = Op.getSimpleValueType();
9183 if (!VT.isFixedLengthVector())
9184 return Result;
9185 return convertFromScalableVector(VT, Result, DAG, Subtarget);
9186}
9187
9188SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
9189 unsigned MaskOpc,
9190 unsigned VecOpc) const {
9191 MVT VT = Op.getSimpleValueType();
9192 if (VT.getVectorElementType() != MVT::i1)
9193 return lowerVPOp(Op, DAG, VecOpc, true);
9194
9195 // It is safe to drop mask parameter as masked-off elements are undef.
9196 SDValue Op1 = Op->getOperand(0);
9197 SDValue Op2 = Op->getOperand(1);
9198 SDValue VL = Op->getOperand(3);
9199
9200 MVT ContainerVT = VT;
9201 const bool IsFixed = VT.isFixedLengthVector();
9202 if (IsFixed) {
9203 ContainerVT = getContainerForFixedLengthVector(VT);
9204 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
9205 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
9206 }
9207
9208 SDLoc DL(Op);
9209 SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL);
9210 if (!IsFixed)
9211 return Val;
9212 return convertFromScalableVector(VT, Val, DAG, Subtarget);
9213}
9214
9215SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
9216 SelectionDAG &DAG) const {
9217 SDLoc DL(Op);
9218 MVT XLenVT = Subtarget.getXLenVT();
9219 MVT VT = Op.getSimpleValueType();
9220 MVT ContainerVT = VT;
9221 if (VT.isFixedLengthVector())
9222 ContainerVT = getContainerForFixedLengthVector(VT);
9223
9224 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9225
9226 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
9227 // Check if the mask is known to be all ones
9228 SDValue Mask = VPNode->getMask();
9229 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9230
9231 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
9232 : Intrinsic::riscv_vlse_mask,
9233 DL, XLenVT);
9234 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
9235 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
9236 VPNode->getStride()};
9237 if (!IsUnmasked) {
9238 if (VT.isFixedLengthVector()) {
9239 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
9240 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9241 }
9242 Ops.push_back(Mask);
9243 }
9244 Ops.push_back(VPNode->getVectorLength());
9245 if (!IsUnmasked) {
9246 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9247 Ops.push_back(Policy);
9248 }
9249
9250 SDValue Result =
9252 VPNode->getMemoryVT(), VPNode->getMemOperand());
9253 SDValue Chain = Result.getValue(1);
9254
9255 if (VT.isFixedLengthVector())
9256 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9257
9258 return DAG.getMergeValues({Result, Chain}, DL);
9259}
9260
9261SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
9262 SelectionDAG &DAG) const {
9263 SDLoc DL(Op);
9264 MVT XLenVT = Subtarget.getXLenVT();
9265
9266 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
9267 SDValue StoreVal = VPNode->getValue();
9268 MVT VT = StoreVal.getSimpleValueType();
9269 MVT ContainerVT = VT;
9270 if (VT.isFixedLengthVector()) {
9271 ContainerVT = getContainerForFixedLengthVector(VT);
9272 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
9273 }
9274
9275 // Check if the mask is known to be all ones
9276 SDValue Mask = VPNode->getMask();
9277 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9278
9279 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
9280 : Intrinsic::riscv_vsse_mask,
9281 DL, XLenVT);
9282 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
9283 VPNode->getBasePtr(), VPNode->getStride()};
9284 if (!IsUnmasked) {
9285 if (VT.isFixedLengthVector()) {
9286 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
9287 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9288 }
9289 Ops.push_back(Mask);
9290 }
9291 Ops.push_back(VPNode->getVectorLength());
9292
9293 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
9294 Ops, VPNode->getMemoryVT(),
9295 VPNode->getMemOperand());
9296}
9297
9298// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
9299// matched to a RVV indexed load. The RVV indexed load instructions only
9300// support the "unsigned unscaled" addressing mode; indices are implicitly
9301// zero-extended or truncated to XLEN and are treated as byte offsets. Any
9302// signed or scaled indexing is extended to the XLEN value type and scaled
9303// accordingly.
9304SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
9305 SelectionDAG &DAG) const {
9306 SDLoc DL(Op);
9307 MVT VT = Op.getSimpleValueType();
9308
9309 const auto *MemSD = cast<MemSDNode>(Op.getNode());
9310 EVT MemVT = MemSD->getMemoryVT();
9311 MachineMemOperand *MMO = MemSD->getMemOperand();
9312 SDValue Chain = MemSD->getChain();
9313 SDValue BasePtr = MemSD->getBasePtr();
9314
9316 SDValue Index, Mask, PassThru, VL;
9317
9318 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
9319 Index = VPGN->getIndex();
9320 Mask = VPGN->getMask();
9321 PassThru = DAG.getUNDEF(VT);
9322 VL = VPGN->getVectorLength();
9323 // VP doesn't support extending loads.
9325 } else {
9326 // Else it must be a MGATHER.
9327 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
9328 Index = MGN->getIndex();
9329 Mask = MGN->getMask();
9330 PassThru = MGN->getPassThru();
9331 LoadExtType = MGN->getExtensionType();
9332 }
9333
9334 MVT IndexVT = Index.getSimpleValueType();
9335 MVT XLenVT = Subtarget.getXLenVT();
9336
9338 "Unexpected VTs!");
9339 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
9340 // Targets have to explicitly opt-in for extending vector loads.
9341 assert(LoadExtType == ISD::NON_EXTLOAD &&
9342 "Unexpected extending MGATHER/VP_GATHER");
9343 (void)LoadExtType;
9344
9345 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9346 // the selection of the masked intrinsics doesn't do this for us.
9347 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9348
9349 MVT ContainerVT = VT;
9350 if (VT.isFixedLengthVector()) {
9351 ContainerVT = getContainerForFixedLengthVector(VT);
9352 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
9353 ContainerVT.getVectorElementCount());
9354
9355 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
9356
9357 if (!IsUnmasked) {
9358 MVT MaskVT = getMaskTypeFor(ContainerVT);
9359 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9360 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9361 }
9362 }
9363
9364 if (!VL)
9365 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9366
9367 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
9368 IndexVT = IndexVT.changeVectorElementType(XLenVT);
9369 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
9370 VL);
9372 TrueMask, VL);
9373 }
9374
9375 unsigned IntID =
9376 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
9377 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
9378 if (IsUnmasked)
9379 Ops.push_back(DAG.getUNDEF(ContainerVT));
9380 else
9381 Ops.push_back(PassThru);
9382 Ops.push_back(BasePtr);
9383 Ops.push_back(Index);
9384 if (!IsUnmasked)
9385 Ops.push_back(Mask);
9386 Ops.push_back(VL);
9387 if (!IsUnmasked)
9389
9390 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9391 SDValue Result =
9392 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
9393 Chain = Result.getValue(1);
9394
9395 if (VT.isFixedLengthVector())
9396 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9397
9398 return DAG.getMergeValues({Result, Chain}, DL);
9399}
9400
9401// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
9402// matched to a RVV indexed store. The RVV indexed store instructions only
9403// support the "unsigned unscaled" addressing mode; indices are implicitly
9404// zero-extended or truncated to XLEN and are treated as byte offsets. Any
9405// signed or scaled indexing is extended to the XLEN value type and scaled
9406// accordingly.
9407SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
9408 SelectionDAG &DAG) const {
9409 SDLoc DL(Op);
9410 const auto *MemSD = cast<MemSDNode>(Op.getNode());
9411 EVT MemVT = MemSD->getMemoryVT();
9412 MachineMemOperand *MMO = MemSD->getMemOperand();
9413 SDValue Chain = MemSD->getChain();
9414 SDValue BasePtr = MemSD->getBasePtr();
9415
9416 bool IsTruncatingStore = false;
9417 SDValue Index, Mask, Val, VL;
9418
9419 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
9420 Index = VPSN->getIndex();
9421 Mask = VPSN->getMask();
9422 Val = VPSN->getValue();
9423 VL = VPSN->getVectorLength();
9424 // VP doesn't support truncating stores.
9425 IsTruncatingStore = false;
9426 } else {
9427 // Else it must be a MSCATTER.
9428 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
9429 Index = MSN->getIndex();
9430 Mask = MSN->getMask();
9431 Val = MSN->getValue();
9432 IsTruncatingStore = MSN->isTruncatingStore();
9433 }
9434
9435 MVT VT = Val.getSimpleValueType();
9436 MVT IndexVT = Index.getSimpleValueType();
9437 MVT XLenVT = Subtarget.getXLenVT();
9438
9440 "Unexpected VTs!");
9441 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
9442 // Targets have to explicitly opt-in for extending vector loads and
9443 // truncating vector stores.
9444 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
9445 (void)IsTruncatingStore;
9446
9447 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9448 // the selection of the masked intrinsics doesn't do this for us.
9449 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9450
9451 MVT ContainerVT = VT;
9452 if (VT.isFixedLengthVector()) {
9453 ContainerVT = getContainerForFixedLengthVector(VT);
9454 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
9455 ContainerVT.getVectorElementCount());
9456
9457 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
9458 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9459
9460 if (!IsUnmasked) {
9461 MVT MaskVT = getMaskTypeFor(ContainerVT);
9462 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9463 }
9464 }
9465
9466 if (!VL)
9467 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9468
9469 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
9470 IndexVT = IndexVT.changeVectorElementType(XLenVT);
9471 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
9472 VL);
9474 TrueMask, VL);
9475 }
9476
9477 unsigned IntID =
9478 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
9479 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
9480 Ops.push_back(Val);
9481 Ops.push_back(BasePtr);
9482 Ops.push_back(Index);
9483 if (!IsUnmasked)
9484 Ops.push_back(Mask);
9485 Ops.push_back(VL);
9486
9488 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
9489}
9490
9491SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
9492 SelectionDAG &DAG) const {
9493 const MVT XLenVT = Subtarget.getXLenVT();
9494 SDLoc DL(Op);
9495 SDValue Chain = Op->getOperand(0);
9496 SDValue SysRegNo = DAG.getTargetConstant(
9497 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
9498 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
9499 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
9500
9501 // Encoding used for rounding mode in RISC-V differs from that used in
9502 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
9503 // table, which consists of a sequence of 4-bit fields, each representing
9504 // corresponding FLT_ROUNDS mode.
9505 static const int Table =
9511
9512 SDValue Shift =
9513 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
9514 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
9515 DAG.getConstant(Table, DL, XLenVT), Shift);
9516 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
9517 DAG.getConstant(7, DL, XLenVT));
9518
9519 return DAG.getMergeValues({Masked, Chain}, DL);
9520}
9521
9522SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
9523 SelectionDAG &DAG) const {
9524 const MVT XLenVT = Subtarget.getXLenVT();
9525 SDLoc DL(Op);
9526 SDValue Chain = Op->getOperand(0);
9527 SDValue RMValue = Op->getOperand(1);
9528 SDValue SysRegNo = DAG.getTargetConstant(
9529 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
9530
9531 // Encoding used for rounding mode in RISC-V differs from that used in
9532 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
9533 // a table, which consists of a sequence of 4-bit fields, each representing
9534 // corresponding RISC-V mode.
9535 static const unsigned Table =
9541
9542 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
9543 DAG.getConstant(2, DL, XLenVT));
9544 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
9545 DAG.getConstant(Table, DL, XLenVT), Shift);
9546 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
9547 DAG.getConstant(0x7, DL, XLenVT));
9548 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
9549 RMValue);
9550}
9551
9552SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
9553 SelectionDAG &DAG) const {
9555
9556 bool isRISCV64 = Subtarget.is64Bit();
9557 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9558
9559 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
9560 return DAG.getFrameIndex(FI, PtrVT);
9561}
9562
9563// Returns the opcode of the target-specific SDNode that implements the 32-bit
9564// form of the given Opcode.
9565static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
9566 switch (Opcode) {
9567 default:
9568 llvm_unreachable("Unexpected opcode");
9569 case ISD::SHL:
9570 return RISCVISD::SLLW;
9571 case ISD::SRA:
9572 return RISCVISD::SRAW;
9573 case ISD::SRL:
9574 return RISCVISD::SRLW;
9575 case ISD::SDIV:
9576 return RISCVISD::DIVW;
9577 case ISD::UDIV:
9578 return RISCVISD::DIVUW;
9579 case ISD::UREM:
9580 return RISCVISD::REMUW;
9581 case ISD::ROTL:
9582 return RISCVISD::ROLW;
9583 case ISD::ROTR:
9584 return RISCVISD::RORW;
9585 }
9586}
9587
9588// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
9589// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
9590// otherwise be promoted to i64, making it difficult to select the
9591// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
9592// type i8/i16/i32 is lost.
9594 unsigned ExtOpc = ISD::ANY_EXTEND) {
9595 SDLoc DL(N);
9596 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
9597 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
9598 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
9599 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
9600 // ReplaceNodeResults requires we maintain the same type for the return value.
9601 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
9602}
9603
9604// Converts the given 32-bit operation to a i64 operation with signed extension
9605// semantic to reduce the signed extension instructions.
9607 SDLoc DL(N);
9608 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9609 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
9610 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
9611 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
9612 DAG.getValueType(MVT::i32));
9613 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
9614}
9615
9618 SelectionDAG &DAG) const {
9619 SDLoc DL(N);
9620 switch (N->getOpcode()) {
9621 default:
9622 llvm_unreachable("Don't know how to custom type legalize this operation!");
9625 case ISD::FP_TO_SINT:
9626 case ISD::FP_TO_UINT: {
9627 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9628 "Unexpected custom legalisation");
9629 bool IsStrict = N->isStrictFPOpcode();
9630 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
9631 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
9632 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
9633 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
9635 if (!isTypeLegal(Op0.getValueType()))
9636 return;
9637 if (IsStrict) {
9638 SDValue Chain = N->getOperand(0);
9639 // In absense of Zfh, promote f16 to f32, then convert.
9640 if (Op0.getValueType() == MVT::f16 &&
9641 !Subtarget.hasStdExtZfhOrZhinx()) {
9642 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
9643 {Chain, Op0});
9644 Chain = Op0.getValue(1);
9645 }
9646 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
9648 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
9649 SDValue Res = DAG.getNode(
9650 Opc, DL, VTs, Chain, Op0,
9651 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
9652 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9653 Results.push_back(Res.getValue(1));
9654 return;
9655 }
9656 // In absense of Zfh, promote f16 to f32, then convert.
9657 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
9658 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
9659
9660 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
9661 SDValue Res =
9662 DAG.getNode(Opc, DL, MVT::i64, Op0,
9663 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
9664 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9665 return;
9666 }
9667 // If the FP type needs to be softened, emit a library call using the 'si'
9668 // version. If we left it to default legalization we'd end up with 'di'. If
9669 // the FP type doesn't need to be softened just let generic type
9670 // legalization promote the result type.
9671 RTLIB::Libcall LC;
9672 if (IsSigned)
9673 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
9674 else
9675 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
9676 MakeLibCallOptions CallOptions;
9677 EVT OpVT = Op0.getValueType();
9678 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
9679 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
9680 SDValue Result;
9681 std::tie(Result, Chain) =
9682 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
9683 Results.push_back(Result);
9684 if (IsStrict)
9685 Results.push_back(Chain);
9686 break;
9687 }
9688 case ISD::LROUND: {
9689 SDValue Op0 = N->getOperand(0);
9690 EVT Op0VT = Op0.getValueType();
9691 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
9693 if (!isTypeLegal(Op0VT))
9694 return;
9695
9696 // In absense of Zfh, promote f16 to f32, then convert.
9697 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
9698 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
9699
9700 SDValue Res =
9701 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
9702 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
9703 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9704 return;
9705 }
9706 // If the FP type needs to be softened, emit a library call to lround. We'll
9707 // need to truncate the result. We assume any value that doesn't fit in i32
9708 // is allowed to return an unspecified value.
9709 RTLIB::Libcall LC =
9710 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
9711 MakeLibCallOptions CallOptions;
9712 EVT OpVT = Op0.getValueType();
9713 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
9714 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
9715 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
9716 Results.push_back(Result);
9717 break;
9718 }
9719 case ISD::READCYCLECOUNTER: {
9720 assert(!Subtarget.is64Bit() &&
9721 "READCYCLECOUNTER only has custom type legalization on riscv32");
9722
9723 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
9724 SDValue RCW =
9725 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
9726
9727 Results.push_back(
9728 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
9729 Results.push_back(RCW.getValue(2));
9730 break;
9731 }
9732 case ISD::LOAD: {
9733 if (!ISD::isNON_EXTLoad(N))
9734 return;
9735
9736 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
9737 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
9739
9740 SDLoc dl(N);
9741 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
9742 Ld->getBasePtr(), Ld->getMemoryVT(),
9743 Ld->getMemOperand());
9744 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
9745 Results.push_back(Res.getValue(1));
9746 return;
9747 }
9748 case ISD::MUL: {
9749 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
9750 unsigned XLen = Subtarget.getXLen();
9751 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
9752 if (Size > XLen) {
9753 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
9754 SDValue LHS = N->getOperand(0);
9755 SDValue RHS = N->getOperand(1);
9756 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
9757
9758 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
9759 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
9760 // We need exactly one side to be unsigned.
9761 if (LHSIsU == RHSIsU)
9762 return;
9763
9764 auto MakeMULPair = [&](SDValue S, SDValue U) {
9765 MVT XLenVT = Subtarget.getXLenVT();
9766 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
9767 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
9768 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
9769 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
9770 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
9771 };
9772
9773 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
9774 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
9775
9776 // The other operand should be signed, but still prefer MULH when
9777 // possible.
9778 if (RHSIsU && LHSIsS && !RHSIsS)
9779 Results.push_back(MakeMULPair(LHS, RHS));
9780 else if (LHSIsU && RHSIsS && !LHSIsS)
9781 Results.push_back(MakeMULPair(RHS, LHS));
9782
9783 return;
9784 }
9785 [[fallthrough]];
9786 }
9787 case ISD::ADD:
9788 case ISD::SUB:
9789 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9790 "Unexpected custom legalisation");
9791 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
9792 break;
9793 case ISD::SHL:
9794 case ISD::SRA:
9795 case ISD::SRL:
9796 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9797 "Unexpected custom legalisation");
9798 if (N->getOperand(1).getOpcode() != ISD::Constant) {
9799 // If we can use a BSET instruction, allow default promotion to apply.
9800 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
9801 isOneConstant(N->getOperand(0)))
9802 break;
9803 Results.push_back(customLegalizeToWOp(N, DAG));
9804 break;
9805 }
9806
9807 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
9808 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
9809 // shift amount.
9810 if (N->getOpcode() == ISD::SHL) {
9811 SDLoc DL(N);
9812 SDValue NewOp0 =
9813 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9814 SDValue NewOp1 =
9815 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
9816 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
9817 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
9818 DAG.getValueType(MVT::i32));
9819 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
9820 }
9821
9822 break;
9823 case ISD::ROTL:
9824 case ISD::ROTR:
9825 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9826 "Unexpected custom legalisation");
9827 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
9828 Subtarget.hasVendorXTHeadBb()) &&
9829 "Unexpected custom legalization");
9830 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
9831 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
9832 return;
9833 Results.push_back(customLegalizeToWOp(N, DAG));
9834 break;
9835 case ISD::CTTZ:
9837 case ISD::CTLZ:
9838 case ISD::CTLZ_ZERO_UNDEF: {
9839 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9840 "Unexpected custom legalisation");
9841
9842 SDValue NewOp0 =
9843 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9844 bool IsCTZ =
9845 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
9846 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
9847 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
9848 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9849 return;
9850 }
9851 case ISD::SDIV:
9852 case ISD::UDIV:
9853 case ISD::UREM: {
9854 MVT VT = N->getSimpleValueType(0);
9855 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
9856 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
9857 "Unexpected custom legalisation");
9858 // Don't promote division/remainder by constant since we should expand those
9859 // to multiply by magic constant.
9861 if (N->getOperand(1).getOpcode() == ISD::Constant &&
9862 !isIntDivCheap(N->getValueType(0), Attr))
9863 return;
9864
9865 // If the input is i32, use ANY_EXTEND since the W instructions don't read
9866 // the upper 32 bits. For other types we need to sign or zero extend
9867 // based on the opcode.
9868 unsigned ExtOpc = ISD::ANY_EXTEND;
9869 if (VT != MVT::i32)
9870 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
9872
9873 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
9874 break;
9875 }
9876 case ISD::SADDO: {
9877 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9878 "Unexpected custom legalisation");
9879
9880 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
9881 // use the default legalization.
9882 if (!isa<ConstantSDNode>(N->getOperand(1)))
9883 return;
9884
9885 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
9886 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
9887 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
9888 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
9889 DAG.getValueType(MVT::i32));
9890
9891 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
9892
9893 // For an addition, the result should be less than one of the operands (LHS)
9894 // if and only if the other operand (RHS) is negative, otherwise there will
9895 // be overflow.
9896 // For a subtraction, the result should be less than one of the operands
9897 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9898 // otherwise there will be overflow.
9899 EVT OType = N->getValueType(1);
9900 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
9901 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
9902
9903 SDValue Overflow =
9904 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
9905 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9906 Results.push_back(Overflow);
9907 return;
9908 }
9909 case ISD::UADDO:
9910 case ISD::USUBO: {
9911 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9912 "Unexpected custom legalisation");
9913 bool IsAdd = N->getOpcode() == ISD::UADDO;
9914 // Create an ADDW or SUBW.
9915 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9916 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
9917 SDValue Res =
9918 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
9919 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
9920 DAG.getValueType(MVT::i32));
9921
9922 SDValue Overflow;
9923 if (IsAdd && isOneConstant(RHS)) {
9924 // Special case uaddo X, 1 overflowed if the addition result is 0.
9925 // The general case (X + C) < C is not necessarily beneficial. Although we
9926 // reduce the live range of X, we may introduce the materialization of
9927 // constant C, especially when the setcc result is used by branch. We have
9928 // no compare with constant and branch instructions.
9929 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
9930 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
9931 } else if (IsAdd && isAllOnesConstant(RHS)) {
9932 // Special case uaddo X, -1 overflowed if X != 0.
9933 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
9934 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
9935 } else {
9936 // Sign extend the LHS and perform an unsigned compare with the ADDW
9937 // result. Since the inputs are sign extended from i32, this is equivalent
9938 // to comparing the lower 32 bits.
9939 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
9940 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
9941 IsAdd ? ISD::SETULT : ISD::SETUGT);
9942 }
9943
9944 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9945 Results.push_back(Overflow);
9946 return;
9947 }
9948 case ISD::UADDSAT:
9949 case ISD::USUBSAT: {
9950 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9951 "Unexpected custom legalisation");
9952 if (Subtarget.hasStdExtZbb()) {
9953 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
9954 // sign extend allows overflow of the lower 32 bits to be detected on
9955 // the promoted size.
9956 SDValue LHS =
9957 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
9958 SDValue RHS =
9959 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
9960 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
9961 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9962 return;
9963 }
9964
9965 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
9966 // promotion for UADDO/USUBO.
9967 Results.push_back(expandAddSubSat(N, DAG));
9968 return;
9969 }
9970 case ISD::ABS: {
9971 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9972 "Unexpected custom legalisation");
9973
9974 if (Subtarget.hasStdExtZbb()) {
9975 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
9976 // This allows us to remember that the result is sign extended. Expanding
9977 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
9978 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
9979 N->getOperand(0));
9980 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
9981 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
9982 return;
9983 }
9984
9985 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
9986 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9987
9988 // Freeze the source so we can increase it's use count.
9989 Src = DAG.getFreeze(Src);
9990
9991 // Copy sign bit to all bits using the sraiw pattern.
9992 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
9993 DAG.getValueType(MVT::i32));
9994 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
9995 DAG.getConstant(31, DL, MVT::i64));
9996
9997 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
9998 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
9999
10000 // NOTE: The result is only required to be anyextended, but sext is
10001 // consistent with type legalization of sub.
10002 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
10003 DAG.getValueType(MVT::i32));
10004 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
10005 return;
10006 }
10007 case ISD::BITCAST: {
10008 EVT VT = N->getValueType(0);
10009 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
10010 SDValue Op0 = N->getOperand(0);
10011 EVT Op0VT = Op0.getValueType();
10012 MVT XLenVT = Subtarget.getXLenVT();
10013 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
10015 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
10016 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
10017 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
10018 Subtarget.hasStdExtZfbfmin()) {
10019 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
10020 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
10021 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
10022 Subtarget.hasStdExtFOrZfinx()) {
10023 SDValue FPConv =
10024 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
10025 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
10026 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 &&
10027 Subtarget.hasStdExtZfa()) {
10028 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
10029 DAG.getVTList(MVT::i32, MVT::i32), Op0);
10030 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
10031 NewReg.getValue(0), NewReg.getValue(1));
10032 Results.push_back(RetReg);
10033 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
10034 isTypeLegal(Op0VT)) {
10035 // Custom-legalize bitcasts from fixed-length vector types to illegal
10036 // scalar types in order to improve codegen. Bitcast the vector to a
10037 // one-element vector type whose element type is the same as the result
10038 // type, and extract the first element.
10039 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
10040 if (isTypeLegal(BVT)) {
10041 SDValue BVec = DAG.getBitcast(BVT, Op0);
10042 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
10043 DAG.getConstant(0, DL, XLenVT)));
10044 }
10045 }
10046 break;
10047 }
10048 case RISCVISD::BREV8: {
10049 MVT VT = N->getSimpleValueType(0);
10050 MVT XLenVT = Subtarget.getXLenVT();
10051 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
10052 "Unexpected custom legalisation");
10053 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
10054 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
10055 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
10056 // ReplaceNodeResults requires we maintain the same type for the return
10057 // value.
10058 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
10059 break;
10060 }
10062 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
10063 // type is illegal (currently only vXi64 RV32).
10064 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
10065 // transferred to the destination register. We issue two of these from the
10066 // upper- and lower- halves of the SEW-bit vector element, slid down to the
10067 // first element.
10068 SDValue Vec = N->getOperand(0);
10069 SDValue Idx = N->getOperand(1);
10070
10071 // The vector type hasn't been legalized yet so we can't issue target
10072 // specific nodes if it needs legalization.
10073 // FIXME: We would manually legalize if it's important.
10074 if (!isTypeLegal(Vec.getValueType()))
10075 return;
10076
10077 MVT VecVT = Vec.getSimpleValueType();
10078
10079 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
10080 VecVT.getVectorElementType() == MVT::i64 &&
10081 "Unexpected EXTRACT_VECTOR_ELT legalization");
10082
10083 // If this is a fixed vector, we need to convert it to a scalable vector.
10084 MVT ContainerVT = VecVT;
10085 if (VecVT.isFixedLengthVector()) {
10086 ContainerVT = getContainerForFixedLengthVector(VecVT);
10087 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10088 }
10089
10090 MVT XLenVT = Subtarget.getXLenVT();
10091
10092 // Use a VL of 1 to avoid processing more elements than we need.
10093 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10094
10095 // Unless the index is known to be 0, we must slide the vector down to get
10096 // the desired element into index 0.
10097 if (!isNullConstant(Idx)) {
10098 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10099 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10100 }
10101
10102 // Extract the lower XLEN bits of the correct vector element.
10103 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10104
10105 // To extract the upper XLEN bits of the vector element, shift the first
10106 // element right by 32 bits and re-extract the lower XLEN bits.
10107 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10108 DAG.getUNDEF(ContainerVT),
10109 DAG.getConstant(32, DL, XLenVT), VL);
10110 SDValue LShr32 =
10111 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
10112 DAG.getUNDEF(ContainerVT), Mask, VL);
10113
10114 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
10115
10116 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
10117 break;
10118 }
10120 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
10121 switch (IntNo) {
10122 default:
10124 "Don't know how to custom type legalize this intrinsic!");
10125 case Intrinsic::experimental_get_vector_length: {
10126 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
10127 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10128 return;
10129 }
10130 case Intrinsic::riscv_orc_b:
10131 case Intrinsic::riscv_brev8:
10132 case Intrinsic::riscv_sha256sig0:
10133 case Intrinsic::riscv_sha256sig1:
10134 case Intrinsic::riscv_sha256sum0:
10135 case Intrinsic::riscv_sha256sum1:
10136 case Intrinsic::riscv_sm3p0:
10137 case Intrinsic::riscv_sm3p1: {
10138 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
10139 return;
10140 unsigned Opc;
10141 switch (IntNo) {
10142 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10143 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10144 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10145 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10146 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10147 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10148 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10149 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10150 }
10151
10152 SDValue NewOp =
10153 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10154 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
10155 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10156 return;
10157 }
10158 case Intrinsic::riscv_sm4ks:
10159 case Intrinsic::riscv_sm4ed: {
10160 unsigned Opc =
10161 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10162 SDValue NewOp0 =
10163 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10164 SDValue NewOp1 =
10165 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
10166 SDValue Res =
10167 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
10168 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10169 return;
10170 }
10171 case Intrinsic::riscv_clmul: {
10172 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
10173 return;
10174
10175 SDValue NewOp0 =
10176 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10177 SDValue NewOp1 =
10178 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
10179 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
10180 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10181 return;
10182 }
10183 case Intrinsic::riscv_clmulh:
10184 case Intrinsic::riscv_clmulr: {
10185 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
10186 return;
10187
10188 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
10189 // to the full 128-bit clmul result of multiplying two xlen values.
10190 // Perform clmulr or clmulh on the shifted values. Finally, extract the
10191 // upper 32 bits.
10192 //
10193 // The alternative is to mask the inputs to 32 bits and use clmul, but
10194 // that requires two shifts to mask each input without zext.w.
10195 // FIXME: If the inputs are known zero extended or could be freely
10196 // zero extended, the mask form would be better.
10197 SDValue NewOp0 =
10198 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10199 SDValue NewOp1 =
10200 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
10201 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
10202 DAG.getConstant(32, DL, MVT::i64));
10203 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
10204 DAG.getConstant(32, DL, MVT::i64));
10205 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
10207 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
10208 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
10209 DAG.getConstant(32, DL, MVT::i64));
10210 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10211 return;
10212 }
10213 case Intrinsic::riscv_vmv_x_s: {
10214 EVT VT = N->getValueType(0);
10215 MVT XLenVT = Subtarget.getXLenVT();
10216 if (VT.bitsLT(XLenVT)) {
10217 // Simple case just extract using vmv.x.s and truncate.
10218 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
10219 Subtarget.getXLenVT(), N->getOperand(1));
10220 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
10221 return;
10222 }
10223
10224 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
10225 "Unexpected custom legalization");
10226
10227 // We need to do the move in two steps.
10228 SDValue Vec = N->getOperand(1);
10229 MVT VecVT = Vec.getSimpleValueType();
10230
10231 // First extract the lower XLEN bits of the element.
10232 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10233
10234 // To extract the upper XLEN bits of the vector element, shift the first
10235 // element right by 32 bits and re-extract the lower XLEN bits.
10236 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
10237
10238 SDValue ThirtyTwoV =
10239 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
10240 DAG.getConstant(32, DL, XLenVT), VL);
10241 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
10242 DAG.getUNDEF(VecVT), Mask, VL);
10243 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
10244
10245 Results.push_back(
10246 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
10247 break;
10248 }
10249 }
10250 break;
10251 }
10252 case ISD::VECREDUCE_ADD:
10253 case ISD::VECREDUCE_AND:
10254 case ISD::VECREDUCE_OR:
10255 case ISD::VECREDUCE_XOR:
10256 case ISD::VECREDUCE_SMAX:
10257 case ISD::VECREDUCE_UMAX:
10258 case ISD::VECREDUCE_SMIN:
10259 case ISD::VECREDUCE_UMIN:
10260 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
10261 Results.push_back(V);
10262 break;
10263 case ISD::VP_REDUCE_ADD:
10264 case ISD::VP_REDUCE_AND:
10265 case ISD::VP_REDUCE_OR:
10266 case ISD::VP_REDUCE_XOR:
10267 case ISD::VP_REDUCE_SMAX:
10268 case ISD::VP_REDUCE_UMAX:
10269 case ISD::VP_REDUCE_SMIN:
10270 case ISD::VP_REDUCE_UMIN:
10271 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
10272 Results.push_back(V);
10273 break;
10274 case ISD::GET_ROUNDING: {
10275 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
10276 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
10277 Results.push_back(Res.getValue(0));
10278 Results.push_back(Res.getValue(1));
10279 break;
10280 }
10281 }
10282}
10283
10284// Try to fold (<bop> x, (reduction.<bop> vec, start))
10286 const RISCVSubtarget &Subtarget) {
10287 auto BinOpToRVVReduce = [](unsigned Opc) {
10288 switch (Opc) {
10289 default:
10290 llvm_unreachable("Unhandled binary to transfrom reduction");
10291 case ISD::ADD:
10293 case ISD::UMAX:
10295 case ISD::SMAX:
10297 case ISD::UMIN:
10299 case ISD::SMIN:
10301 case ISD::AND:
10303 case ISD::OR:
10305 case ISD::XOR:
10307 case ISD::FADD:
10309 case ISD::FMAXNUM:
10311 case ISD::FMINNUM:
10313 }
10314 };
10315
10316 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
10317 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10318 isNullConstant(V.getOperand(1)) &&
10319 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
10320 };
10321
10322 unsigned Opc = N->getOpcode();
10323 unsigned ReduceIdx;
10324 if (IsReduction(N->getOperand(0), Opc))
10325 ReduceIdx = 0;
10326 else if (IsReduction(N->getOperand(1), Opc))
10327 ReduceIdx = 1;
10328 else
10329 return SDValue();
10330
10331 // Skip if FADD disallows reassociation but the combiner needs.
10332 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
10333 return SDValue();
10334
10335 SDValue Extract = N->getOperand(ReduceIdx);
10336 SDValue Reduce = Extract.getOperand(0);
10337 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
10338 return SDValue();
10339
10340 SDValue ScalarV = Reduce.getOperand(2);
10341 EVT ScalarVT = ScalarV.getValueType();
10342 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
10343 ScalarV.getOperand(0)->isUndef() &&
10344 isNullConstant(ScalarV.getOperand(2)))
10345 ScalarV = ScalarV.getOperand(1);
10346
10347 // Make sure that ScalarV is a splat with VL=1.
10348 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
10349 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
10350 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
10351 return SDValue();
10352
10353 if (!isNonZeroAVL(ScalarV.getOperand(2)))
10354 return SDValue();
10355
10356 // Check the scalar of ScalarV is neutral element
10357 // TODO: Deal with value other than neutral element.
10358 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
10359 0))
10360 return SDValue();
10361
10362 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
10363 // FIXME: We might be able to improve this if operand 0 is undef.
10364 if (!isNonZeroAVL(Reduce.getOperand(5)))
10365 return SDValue();
10366
10367 SDValue NewStart = N->getOperand(1 - ReduceIdx);
10368
10369 SDLoc DL(N);
10370 SDValue NewScalarV =
10371 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
10372 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
10373
10374 // If we looked through an INSERT_SUBVECTOR we need to restore it.
10375 if (ScalarVT != ScalarV.getValueType())
10376 NewScalarV =
10377 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
10378 NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
10379
10380 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
10381 NewScalarV, Reduce.getOperand(3),
10382 Reduce.getOperand(4), Reduce.getOperand(5)};
10383 SDValue NewReduce =
10384 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
10385 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
10386 Extract.getOperand(1));
10387}
10388
10389// Optimize (add (shl x, c0), (shl y, c1)) ->
10390// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
10392 const RISCVSubtarget &Subtarget) {
10393 // Perform this optimization only in the zba extension.
10394 if (!Subtarget.hasStdExtZba())
10395 return SDValue();
10396
10397 // Skip for vector types and larger types.
10398 EVT VT = N->getValueType(0);
10399 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
10400 return SDValue();
10401
10402 // The two operand nodes must be SHL and have no other use.
10403 SDValue N0 = N->getOperand(0);
10404 SDValue N1 = N->getOperand(1);
10405 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
10406 !N0->hasOneUse() || !N1->hasOneUse())
10407 return SDValue();
10408
10409 // Check c0 and c1.
10410 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
10411 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
10412 if (!N0C || !N1C)
10413 return SDValue();
10414 int64_t C0 = N0C->getSExtValue();
10415 int64_t C1 = N1C->getSExtValue();
10416 if (C0 <= 0 || C1 <= 0)
10417 return SDValue();
10418
10419 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
10420 int64_t Bits = std::min(C0, C1);
10421 int64_t Diff = std::abs(C0 - C1);
10422 if (Diff != 1 && Diff != 2 && Diff != 3)
10423 return SDValue();
10424
10425 // Build nodes.
10426 SDLoc DL(N);
10427 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
10428 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
10429 SDValue NA0 =
10430 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
10431 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
10432 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
10433}
10434
10435// Combine a constant select operand into its use:
10436//
10437// (and (select cond, -1, c), x)
10438// -> (select cond, x, (and x, c)) [AllOnes=1]
10439// (or (select cond, 0, c), x)
10440// -> (select cond, x, (or x, c)) [AllOnes=0]
10441// (xor (select cond, 0, c), x)
10442// -> (select cond, x, (xor x, c)) [AllOnes=0]
10443// (add (select cond, 0, c), x)
10444// -> (select cond, x, (add x, c)) [AllOnes=0]
10445// (sub x, (select cond, 0, c))
10446// -> (select cond, x, (sub x, c)) [AllOnes=0]
10448 SelectionDAG &DAG, bool AllOnes,
10449 const RISCVSubtarget &Subtarget) {
10450 EVT VT = N->getValueType(0);
10451
10452 // Skip vectors.
10453 if (VT.isVector())
10454 return SDValue();
10455
10456 if (!Subtarget.hasShortForwardBranchOpt() ||
10457 (Slct.getOpcode() != ISD::SELECT &&
10458 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
10459 !Slct.hasOneUse())
10460 return SDValue();
10461
10462 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
10464 };
10465
10466 bool SwapSelectOps;
10467 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
10468 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
10469 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
10470 SDValue NonConstantVal;
10471 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
10472 SwapSelectOps = false;
10473 NonConstantVal = FalseVal;
10474 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
10475 SwapSelectOps = true;
10476 NonConstantVal = TrueVal;
10477 } else
10478 return SDValue();
10479
10480 // Slct is now know to be the desired identity constant when CC is true.
10481 TrueVal = OtherOp;
10482 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
10483 // Unless SwapSelectOps says the condition should be false.
10484 if (SwapSelectOps)
10485 std::swap(TrueVal, FalseVal);
10486
10487 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
10488 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
10489 {Slct.getOperand(0), Slct.getOperand(1),
10490 Slct.getOperand(2), TrueVal, FalseVal});
10491
10492 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
10493 {Slct.getOperand(0), TrueVal, FalseVal});
10494}
10495
10496// Attempt combineSelectAndUse on each operand of a commutative operator N.
10498 bool AllOnes,
10499 const RISCVSubtarget &Subtarget) {
10500 SDValue N0 = N->getOperand(0);
10501 SDValue N1 = N->getOperand(1);
10502 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
10503 return Result;
10504 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
10505 return Result;
10506 return SDValue();
10507}
10508
10509// Transform (add (mul x, c0), c1) ->
10510// (add (mul (add x, c1/c0), c0), c1%c0).
10511// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
10512// that should be excluded is when c0*(c1/c0) is simm12, which will lead
10513// to an infinite loop in DAGCombine if transformed.
10514// Or transform (add (mul x, c0), c1) ->
10515// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
10516// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
10517// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
10518// lead to an infinite loop in DAGCombine if transformed.
10519// Or transform (add (mul x, c0), c1) ->
10520// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
10521// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
10522// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
10523// lead to an infinite loop in DAGCombine if transformed.
10524// Or transform (add (mul x, c0), c1) ->
10525// (mul (add x, c1/c0), c0).
10526// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
10528 const RISCVSubtarget &Subtarget) {
10529 // Skip for vector types and larger types.
10530 EVT VT = N->getValueType(0);
10531 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
10532 return SDValue();
10533 // The first operand node must be a MUL and has no other use.
10534 SDValue N0 = N->getOperand(0);
10535 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
10536 return SDValue();
10537 // Check if c0 and c1 match above conditions.
10538 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
10539 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
10540 if (!N0C || !N1C)
10541 return SDValue();
10542 // If N0C has multiple uses it's possible one of the cases in
10543 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
10544 // in an infinite loop.
10545 if (!N0C->hasOneUse())
10546 return SDValue();
10547 int64_t C0 = N0C->getSExtValue();
10548 int64_t C1 = N1C->getSExtValue();
10549 int64_t CA, CB;
10550 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
10551 return SDValue();
10552 // Search for proper CA (non-zero) and CB that both are simm12.
10553 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
10554 !isInt<12>(C0 * (C1 / C0))) {
10555 CA = C1 / C0;
10556 CB = C1 % C0;
10557 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
10558 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
10559 CA = C1 / C0 + 1;
10560 CB = C1 % C0 - C0;
10561 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
10562 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
10563 CA = C1 / C0 - 1;
10564 CB = C1 % C0 + C0;
10565 } else
10566 return SDValue();
10567 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
10568 SDLoc DL(N);
10569 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
10570 DAG.getConstant(CA, DL, VT));
10571 SDValue New1 =
10572 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
10573 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
10574}
10575
10576// Try to turn (add (xor (setcc X, Y), 1) -1) into (neg (setcc X, Y)).
10578 SDValue N0 = N->getOperand(0);
10579 SDValue N1 = N->getOperand(1);
10580 EVT VT = N->getValueType(0);
10581 SDLoc DL(N);
10582
10583 // RHS should be -1.
10584 if (!isAllOnesConstant(N1))
10585 return SDValue();
10586
10587 // Look for an (xor (setcc X, Y), 1).
10588 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)) ||
10589 N0.getOperand(0).getOpcode() != ISD::SETCC)
10590 return SDValue();
10591
10592 // Emit a negate of the setcc.
10593 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
10594 N0.getOperand(0));
10595}
10596
10598 const RISCVSubtarget &Subtarget) {
10599 if (SDValue V = combineAddOfBooleanXor(N, DAG))
10600 return V;
10601 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
10602 return V;
10603 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
10604 return V;
10605 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
10606 return V;
10607 // fold (add (select lhs, rhs, cc, 0, y), x) ->
10608 // (select lhs, rhs, cc, x, (add x, y))
10609 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
10610}
10611
10612// Try to turn a sub boolean RHS and constant LHS into an addi.
10614 SDValue N0 = N->getOperand(0);
10615 SDValue N1 = N->getOperand(1);
10616 EVT VT = N->getValueType(0);
10617 SDLoc DL(N);
10618
10619 // Require a constant LHS.
10620 auto *N0C = dyn_cast<ConstantSDNode>(N0);
10621 if (!N0C)
10622 return SDValue();
10623
10624 // All our optimizations involve subtracting 1 from the immediate and forming
10625 // an ADDI. Make sure the new immediate is valid for an ADDI.
10626 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
10627 if (!ImmValMinus1.isSignedIntN(12))
10628 return SDValue();
10629
10630 SDValue NewLHS;
10631 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
10632 // (sub constant, (setcc x, y, eq/neq)) ->
10633 // (add (setcc x, y, neq/eq), constant - 1)
10634 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
10635 EVT SetCCOpVT = N1.getOperand(0).getValueType();
10636 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
10637 return SDValue();
10638 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
10639 NewLHS =
10640 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
10641 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
10642 N1.getOperand(0).getOpcode() == ISD::SETCC) {
10643 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
10644 // Since setcc returns a bool the xor is equivalent to 1-setcc.
10645 NewLHS = N1.getOperand(0);
10646 } else
10647 return SDValue();
10648
10649 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
10650 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
10651}
10652
10654 const RISCVSubtarget &Subtarget) {
10655 if (SDValue V = combineSubOfBoolean(N, DAG))
10656 return V;
10657
10658 SDValue N0 = N->getOperand(0);
10659 SDValue N1 = N->getOperand(1);
10660 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
10661 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
10662 isNullConstant(N1.getOperand(1))) {
10663 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
10664 if (CCVal == ISD::SETLT) {
10665 EVT VT = N->getValueType(0);
10666 SDLoc DL(N);
10667 unsigned ShAmt = N0.getValueSizeInBits() - 1;
10668 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
10669 DAG.getConstant(ShAmt, DL, VT));
10670 }
10671 }
10672
10673 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
10674 // (select lhs, rhs, cc, x, (sub x, y))
10675 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
10676}
10677
10678// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
10679// Legalizing setcc can introduce xors like this. Doing this transform reduces
10680// the number of xors and may allow the xor to fold into a branch condition.
10682 SDValue N0 = N->getOperand(0);
10683 SDValue N1 = N->getOperand(1);
10684 bool IsAnd = N->getOpcode() == ISD::AND;
10685
10686 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
10687 return SDValue();
10688
10689 if (!N0.hasOneUse() || !N1.hasOneUse())
10690 return SDValue();
10691
10692 SDValue N01 = N0.getOperand(1);
10693 SDValue N11 = N1.getOperand(1);
10694
10695 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
10696 // (xor X, -1) based on the upper bits of the other operand being 0. If the
10697 // operation is And, allow one of the Xors to use -1.
10698 if (isOneConstant(N01)) {
10699 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
10700 return SDValue();
10701 } else if (isOneConstant(N11)) {
10702 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
10703 if (!(IsAnd && isAllOnesConstant(N01)))
10704 return SDValue();
10705 } else
10706 return SDValue();
10707
10708 EVT VT = N->getValueType(0);
10709
10710 SDValue N00 = N0.getOperand(0);
10711 SDValue N10 = N1.getOperand(0);
10712
10713 // The LHS of the xors needs to be 0/1.
10715 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
10716 return SDValue();
10717
10718 // Invert the opcode and insert a new xor.
10719 SDLoc DL(N);
10720 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
10721 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
10722 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
10723}
10724
10726 const RISCVSubtarget &Subtarget) {
10727 SDValue N0 = N->getOperand(0);
10728 EVT VT = N->getValueType(0);
10729
10730 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
10731 // extending X. This is safe since we only need the LSB after the shift and
10732 // shift amounts larger than 31 would produce poison. If we wait until
10733 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
10734 // to use a BEXT instruction.
10735 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
10736 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
10737 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
10738 SDLoc DL(N0);
10739 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
10740 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
10741 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
10742 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
10743 }
10744
10745 return SDValue();
10746}
10747
10748// Combines two comparison operation and logic operation to one selection
10749// operation(min, max) and logic operation. Returns new constructed Node if
10750// conditions for optimization are satisfied.
10753 const RISCVSubtarget &Subtarget) {
10754 SelectionDAG &DAG = DCI.DAG;
10755
10756 SDValue N0 = N->getOperand(0);
10757 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
10758 // extending X. This is safe since we only need the LSB after the shift and
10759 // shift amounts larger than 31 would produce poison. If we wait until
10760 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
10761 // to use a BEXT instruction.
10762 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
10763 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
10764 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
10765 N0.hasOneUse()) {
10766 SDLoc DL(N);
10767 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
10768 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
10769 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
10770 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
10771 DAG.getConstant(1, DL, MVT::i64));
10772 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
10773 }
10774
10775 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
10776 return V;
10777
10778 if (DCI.isAfterLegalizeDAG())
10779 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
10780 return V;
10781
10782 // fold (and (select lhs, rhs, cc, -1, y), x) ->
10783 // (select lhs, rhs, cc, x, (and x, y))
10784 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
10785}
10786
10788 const RISCVSubtarget &Subtarget) {
10789 SelectionDAG &DAG = DCI.DAG;
10790
10791 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
10792 return V;
10793
10794 if (DCI.isAfterLegalizeDAG())
10795 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
10796 return V;
10797
10798 // fold (or (select cond, 0, y), x) ->
10799 // (select cond, x, (or x, y))
10800 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
10801}
10802
10804 const RISCVSubtarget &Subtarget) {
10805 SDValue N0 = N->getOperand(0);
10806 SDValue N1 = N->getOperand(1);
10807
10808 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
10809 // NOTE: Assumes ROL being legal means ROLW is legal.
10810 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10811 if (N0.getOpcode() == RISCVISD::SLLW &&
10813 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
10814 SDLoc DL(N);
10815 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
10816 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
10817 }
10818
10819 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
10820 if (N0.hasOneUse() && N0.getOpcode() == ISD::SETCC && isOneConstant(N1)) {
10821 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
10823 if (ConstN00 && CC == ISD::SETLT) {
10824 EVT VT = N0.getValueType();
10825 SDLoc DL(N0);
10826 const APInt &Imm = ConstN00->getAPIntValue();
10827 if ((Imm + 1).isSignedIntN(12))
10828 return DAG.getSetCC(DL, VT, N0.getOperand(1),
10829 DAG.getConstant(Imm + 1, DL, VT), CC);
10830 }
10831 }
10832
10833 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
10834 return V;
10835 // fold (xor (select cond, 0, y), x) ->
10836 // (select cond, x, (xor x, y))
10837 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
10838}
10839
10840// According to the property that indexed load/store instructions
10841// zero-extended their indices, \p narrowIndex tries to narrow the type of index
10842// operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C <
10843// bits(ty).
10845 if (N.getOpcode() != ISD::SHL || !N->hasOneUse())
10846 return SDValue();
10847
10848 SDValue N0 = N.getOperand(0);
10849 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
10851 return SDValue();
10852 if (!N0->hasOneUse())
10853 return SDValue();
10854
10855 APInt ShAmt;
10856 SDValue N1 = N.getOperand(1);
10857 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
10858 return SDValue();
10859
10860 SDLoc DL(N);
10861 SDValue Src = N0.getOperand(0);
10862 EVT SrcVT = Src.getValueType();
10863 unsigned SrcElen = SrcVT.getScalarSizeInBits();
10864 unsigned ShAmtV = ShAmt.getZExtValue();
10865 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
10866 NewElen = std::max(NewElen, 8U);
10867
10868 // Skip if NewElen is not narrower than the original extended type.
10869 if (NewElen >= N0.getValueType().getScalarSizeInBits())
10870 return SDValue();
10871
10872 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
10873 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
10874
10875 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
10876 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
10877 return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
10878}
10879
10880// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
10881// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
10882// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
10883// can become a sext.w instead of a shift pair.
10885 const RISCVSubtarget &Subtarget) {
10886 SDValue N0 = N->getOperand(0);
10887 SDValue N1 = N->getOperand(1);
10888 EVT VT = N->getValueType(0);
10889 EVT OpVT = N0.getValueType();
10890
10891 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
10892 return SDValue();
10893
10894 // RHS needs to be a constant.
10895 auto *N1C = dyn_cast<ConstantSDNode>(N1);
10896 if (!N1C)
10897 return SDValue();
10898
10899 // LHS needs to be (and X, 0xffffffff).
10900 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
10902 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
10903 return SDValue();
10904
10905 // Looking for an equality compare.
10906 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10907 if (!isIntEqualitySetCC(Cond))
10908 return SDValue();
10909
10910 // Don't do this if the sign bit is provably zero, it will be turned back into
10911 // an AND.
10912 APInt SignMask = APInt::getOneBitSet(64, 31);
10913 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
10914 return SDValue();
10915
10916 const APInt &C1 = N1C->getAPIntValue();
10917
10918 SDLoc dl(N);
10919 // If the constant is larger than 2^32 - 1 it is impossible for both sides
10920 // to be equal.
10921 if (C1.getActiveBits() > 32)
10922 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
10923
10924 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
10925 N0.getOperand(0), DAG.getValueType(MVT::i32));
10926 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
10927 dl, OpVT), Cond);
10928}
10929
10930static SDValue
10932 const RISCVSubtarget &Subtarget) {
10933 SDValue Src = N->getOperand(0);
10934 EVT VT = N->getValueType(0);
10935
10936 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
10937 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
10938 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
10939 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
10940 Src.getOperand(0));
10941
10942 return SDValue();
10943}
10944
10945namespace {
10946// Forward declaration of the structure holding the necessary information to
10947// apply a combine.
10948struct CombineResult;
10949
10950/// Helper class for folding sign/zero extensions.
10951/// In particular, this class is used for the following combines:
10952/// add_vl -> vwadd(u) | vwadd(u)_w
10953/// sub_vl -> vwsub(u) | vwsub(u)_w
10954/// mul_vl -> vwmul(u) | vwmul_su
10955///
10956/// An object of this class represents an operand of the operation we want to
10957/// combine.
10958/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
10959/// NodeExtensionHelper for `a` and one for `b`.
10960///
10961/// This class abstracts away how the extension is materialized and
10962/// how its Mask, VL, number of users affect the combines.
10963///
10964/// In particular:
10965/// - VWADD_W is conceptually == add(op0, sext(op1))
10966/// - VWADDU_W == add(op0, zext(op1))
10967/// - VWSUB_W == sub(op0, sext(op1))
10968/// - VWSUBU_W == sub(op0, zext(op1))
10969///
10970/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
10971/// zext|sext(smaller_value).
10972struct NodeExtensionHelper {
10973 /// Records if this operand is like being zero extended.
10974 bool SupportsZExt;
10975 /// Records if this operand is like being sign extended.
10976 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
10977 /// instance, a splat constant (e.g., 3), would support being both sign and
10978 /// zero extended.
10979 bool SupportsSExt;
10980 /// This boolean captures whether we care if this operand would still be
10981 /// around after the folding happens.
10982 bool EnforceOneUse;
10983 /// Records if this operand's mask needs to match the mask of the operation
10984 /// that it will fold into.
10985 bool CheckMask;
10986 /// Value of the Mask for this operand.
10987 /// It may be SDValue().
10988 SDValue Mask;
10989 /// Value of the vector length operand.
10990 /// It may be SDValue().
10991 SDValue VL;
10992 /// Original value that this NodeExtensionHelper represents.
10993 SDValue OrigOperand;
10994
10995 /// Get the value feeding the extension or the value itself.
10996 /// E.g., for zext(a), this would return a.
10997 SDValue getSource() const {
10998 switch (OrigOperand.getOpcode()) {
10999 case RISCVISD::VSEXT_VL:
11000 case RISCVISD::VZEXT_VL:
11001 return OrigOperand.getOperand(0);
11002 default:
11003 return OrigOperand;
11004 }
11005 }
11006
11007 /// Check if this instance represents a splat.
11008 bool isSplat() const {
11009 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL;
11010 }
11011
11012 /// Get or create a value that can feed \p Root with the given extension \p
11013 /// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
11014 /// \see ::getSource().
11015 SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG,
11016 std::optional<bool> SExt) const {
11017 if (!SExt.has_value())
11018 return OrigOperand;
11019
11020 MVT NarrowVT = getNarrowType(Root);
11021
11022 SDValue Source = getSource();
11023 if (Source.getValueType() == NarrowVT)
11024 return Source;
11025
11026 unsigned ExtOpc = *SExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
11027
11028 // If we need an extension, we should be changing the type.
11029 SDLoc DL(Root);
11030 auto [Mask, VL] = getMaskAndVL(Root);
11031 switch (OrigOperand.getOpcode()) {
11032 case RISCVISD::VSEXT_VL:
11033 case RISCVISD::VZEXT_VL:
11034 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
11036 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
11037 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
11038 default:
11039 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
11040 // and that operand should already have the right NarrowVT so no
11041 // extension should be required at this point.
11042 llvm_unreachable("Unsupported opcode");
11043 }
11044 }
11045
11046 /// Helper function to get the narrow type for \p Root.
11047 /// The narrow type is the type of \p Root where we divided the size of each
11048 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
11049 /// \pre The size of the type of the elements of Root must be a multiple of 2
11050 /// and be greater than 16.
11051 static MVT getNarrowType(const SDNode *Root) {
11052 MVT VT = Root->getSimpleValueType(0);
11053
11054 // Determine the narrow size.
11055 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
11056 assert(NarrowSize >= 8 && "Trying to extend something we can't represent");
11057 MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
11059 return NarrowVT;
11060 }
11061
11062 /// Return the opcode required to materialize the folding of the sign
11063 /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for
11064 /// both operands for \p Opcode.
11065 /// Put differently, get the opcode to materialize:
11066 /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b)
11067 /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b)
11068 /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
11069 static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) {
11070 switch (Opcode) {
11071 case RISCVISD::ADD_VL:
11074 return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL;
11075 case RISCVISD::MUL_VL:
11076 return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
11077 case RISCVISD::SUB_VL:
11080 return IsSExt ? RISCVISD::VWSUB_VL : RISCVISD::VWSUBU_VL;
11081 default:
11082 llvm_unreachable("Unexpected opcode");
11083 }
11084 }
11085
11086 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
11087 /// newOpcode(a, b).
11088 static unsigned getSUOpcode(unsigned Opcode) {
11089 assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL");
11090 return RISCVISD::VWMULSU_VL;
11091 }
11092
11093 /// Get the opcode to materialize \p Opcode(a, s|zext(b)) ->
11094 /// newOpcode(a, b).
11095 static unsigned getWOpcode(unsigned Opcode, bool IsSExt) {
11096 switch (Opcode) {
11097 case RISCVISD::ADD_VL:
11099 case RISCVISD::SUB_VL:
11101 default:
11102 llvm_unreachable("Unexpected opcode");
11103 }
11104 }
11105
11106 using CombineToTry = std::function<std::optional<CombineResult>(
11107 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
11108 const NodeExtensionHelper & /*RHS*/)>;
11109
11110 /// Check if this node needs to be fully folded or extended for all users.
11111 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
11112
11113 /// Helper method to set the various fields of this struct based on the
11114 /// type of \p Root.
11115 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) {
11116 SupportsZExt = false;
11117 SupportsSExt = false;
11118 EnforceOneUse = true;
11119 CheckMask = true;
11120 switch (OrigOperand.getOpcode()) {
11121 case RISCVISD::VZEXT_VL:
11122 SupportsZExt = true;
11123 Mask = OrigOperand.getOperand(1);
11124 VL = OrigOperand.getOperand(2);
11125 break;
11126 case RISCVISD::VSEXT_VL:
11127 SupportsSExt = true;
11128 Mask = OrigOperand.getOperand(1);
11129 VL = OrigOperand.getOperand(2);
11130 break;
11131 case RISCVISD::VMV_V_X_VL: {
11132 // Historically, we didn't care about splat values not disappearing during
11133 // combines.
11134 EnforceOneUse = false;
11135 CheckMask = false;
11136 VL = OrigOperand.getOperand(2);
11137
11138 // The operand is a splat of a scalar.
11139
11140 // The pasthru must be undef for tail agnostic.
11141 if (!OrigOperand.getOperand(0).isUndef())
11142 break;
11143
11144 // Get the scalar value.
11145 SDValue Op = OrigOperand.getOperand(1);
11146
11147 // See if we have enough sign bits or zero bits in the scalar to use a
11148 // widening opcode by splatting to smaller element size.
11149 MVT VT = Root->getSimpleValueType(0);
11150 unsigned EltBits = VT.getScalarSizeInBits();
11151 unsigned ScalarBits = Op.getValueSizeInBits();
11152 // Make sure we're getting all element bits from the scalar register.
11153 // FIXME: Support implicit sign extension of vmv.v.x?
11154 if (ScalarBits < EltBits)
11155 break;
11156
11157 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
11158 // If the narrow type cannot be expressed with a legal VMV,
11159 // this is not a valid candidate.
11160 if (NarrowSize < 8)
11161 break;
11162
11163 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
11164 SupportsSExt = true;
11165 if (DAG.MaskedValueIsZero(Op,
11166 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
11167 SupportsZExt = true;
11168 break;
11169 }
11170 default:
11171 break;
11172 }
11173 }
11174
11175 /// Check if \p Root supports any extension folding combines.
11176 static bool isSupportedRoot(const SDNode *Root) {
11177 switch (Root->getOpcode()) {
11178 case RISCVISD::ADD_VL:
11179 case RISCVISD::MUL_VL:
11182 case RISCVISD::SUB_VL:
11185 return true;
11186 default:
11187 return false;
11188 }
11189 }
11190
11191 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
11192 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) {
11193 assert(isSupportedRoot(Root) && "Trying to build an helper with an "
11194 "unsupported root");
11195 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
11196 OrigOperand = Root->getOperand(OperandIdx);
11197
11198 unsigned Opc = Root->getOpcode();
11199 switch (Opc) {
11200 // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were
11201 // <ADD|SUB>(LHS, S|ZEXT(RHS))
11206 if (OperandIdx == 1) {
11207 SupportsZExt =
11209 SupportsSExt = !SupportsZExt;
11210 std::tie(Mask, VL) = getMaskAndVL(Root);
11211 CheckMask = true;
11212 // There's no existing extension here, so we don't have to worry about
11213 // making sure it gets removed.
11214 EnforceOneUse = false;
11215 break;
11216 }
11217 [[fallthrough]];
11218 default:
11219 fillUpExtensionSupport(Root, DAG);
11220 break;
11221 }
11222 }
11223
11224 /// Check if this operand is compatible with the given vector length \p VL.
11225 bool isVLCompatible(SDValue VL) const {
11226 return this->VL != SDValue() && this->VL == VL;
11227 }
11228
11229 /// Check if this operand is compatible with the given \p Mask.
11230 bool isMaskCompatible(SDValue Mask) const {
11231 return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask);
11232 }
11233
11234 /// Helper function to get the Mask and VL from \p Root.
11235 static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) {
11236 assert(isSupportedRoot(Root) && "Unexpected root");
11237 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
11238 }
11239
11240 /// Check if the Mask and VL of this operand are compatible with \p Root.
11241 bool areVLAndMaskCompatible(const SDNode *Root) const {
11242 auto [Mask, VL] = getMaskAndVL(Root);
11243 return isMaskCompatible(Mask) && isVLCompatible(VL);
11244 }
11245
11246 /// Helper function to check if \p N is commutative with respect to the
11247 /// foldings that are supported by this class.
11248 static bool isCommutative(const SDNode *N) {
11249 switch (N->getOpcode()) {
11250 case RISCVISD::ADD_VL:
11251 case RISCVISD::MUL_VL:
11254 return true;
11255 case RISCVISD::SUB_VL:
11258 return false;
11259 default:
11260 llvm_unreachable("Unexpected opcode");
11261 }
11262 }
11263
11264 /// Get a list of combine to try for folding extensions in \p Root.
11265 /// Note that each returned CombineToTry function doesn't actually modify
11266 /// anything. Instead they produce an optional CombineResult that if not None,
11267 /// need to be materialized for the combine to be applied.
11268 /// \see CombineResult::materialize.
11269 /// If the related CombineToTry function returns std::nullopt, that means the
11270 /// combine didn't match.
11271 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
11272};
11273
11274/// Helper structure that holds all the necessary information to materialize a
11275/// combine that does some extension folding.
11276struct CombineResult {
11277 /// Opcode to be generated when materializing the combine.
11278 unsigned TargetOpcode;
11279 // No value means no extension is needed. If extension is needed, the value
11280 // indicates if it needs to be sign extended.
11281 std::optional<bool> SExtLHS;
11282 std::optional<bool> SExtRHS;
11283 /// Root of the combine.
11284 SDNode *Root;
11285 /// LHS of the TargetOpcode.
11286 NodeExtensionHelper LHS;
11287 /// RHS of the TargetOpcode.
11288 NodeExtensionHelper RHS;
11289
11290 CombineResult(unsigned TargetOpcode, SDNode *Root,
11291 const NodeExtensionHelper &LHS, std::optional<bool> SExtLHS,
11292 const NodeExtensionHelper &RHS, std::optional<bool> SExtRHS)
11293 : TargetOpcode(TargetOpcode), SExtLHS(SExtLHS), SExtRHS(SExtRHS),
11294 Root(Root), LHS(LHS), RHS(RHS) {}
11295
11296 /// Return a value that uses TargetOpcode and that can be used to replace
11297 /// Root.
11298 /// The actual replacement is *not* done in that method.
11299 SDValue materialize(SelectionDAG &DAG) const {
11300 SDValue Mask, VL, Merge;
11301 std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root);
11302 Merge = Root->getOperand(2);
11303 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
11304 LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS),
11305 RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge,
11306 Mask, VL);
11307 }
11308};
11309
11310/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
11311/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
11312/// are zext) and LHS and RHS can be folded into Root.
11313/// AllowSExt and AllozZExt define which form `ext` can take in this pattern.
11314///
11315/// \note If the pattern can match with both zext and sext, the returned
11316/// CombineResult will feature the zext result.
11317///
11318/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11319/// can be used to apply the pattern.
11320static std::optional<CombineResult>
11321canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
11322 const NodeExtensionHelper &RHS, bool AllowSExt,
11323 bool AllowZExt) {
11324 assert((AllowSExt || AllowZExt) && "Forgot to set what you want?");
11325 if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
11326 return std::nullopt;
11327 if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt)
11328 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
11329 Root->getOpcode(), /*IsSExt=*/false),
11330 Root, LHS, /*SExtLHS=*/false, RHS,
11331 /*SExtRHS=*/false);
11332 if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt)
11333 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
11334 Root->getOpcode(), /*IsSExt=*/true),
11335 Root, LHS, /*SExtLHS=*/true, RHS,
11336 /*SExtRHS=*/true);
11337 return std::nullopt;
11338}
11339
11340/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
11341/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
11342/// are zext) and LHS and RHS can be folded into Root.
11343///
11344/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11345/// can be used to apply the pattern.
11346static std::optional<CombineResult>
11347canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
11348 const NodeExtensionHelper &RHS) {
11349 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
11350 /*AllowZExt=*/true);
11351}
11352
11353/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
11354///
11355/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11356/// can be used to apply the pattern.
11357static std::optional<CombineResult>
11358canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
11359 const NodeExtensionHelper &RHS) {
11360 if (!RHS.areVLAndMaskCompatible(Root))
11361 return std::nullopt;
11362
11363 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
11364 // sext/zext?
11365 // Control this behavior behind an option (AllowSplatInVW_W) for testing
11366 // purposes.
11367 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
11368 return CombineResult(
11369 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false),
11370 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false);
11371 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
11372 return CombineResult(
11373 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true),
11374 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true);
11375 return std::nullopt;
11376}
11377
11378/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
11379///
11380/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11381/// can be used to apply the pattern.
11382static std::optional<CombineResult>
11383canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
11384 const NodeExtensionHelper &RHS) {
11385 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
11386 /*AllowZExt=*/false);
11387}
11388
11389/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
11390///
11391/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11392/// can be used to apply the pattern.
11393static std::optional<CombineResult>
11394canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
11395 const NodeExtensionHelper &RHS) {
11396 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false,
11397 /*AllowZExt=*/true);
11398}
11399
11400/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
11401///
11402/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11403/// can be used to apply the pattern.
11404static std::optional<CombineResult>
11405canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
11406 const NodeExtensionHelper &RHS) {
11407 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
11408 return std::nullopt;
11409 if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
11410 return std::nullopt;
11411 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
11412 Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false);
11413}
11414
11416NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
11417 SmallVector<CombineToTry> Strategies;
11418 switch (Root->getOpcode()) {
11419 case RISCVISD::ADD_VL:
11420 case RISCVISD::SUB_VL:
11421 // add|sub -> vwadd(u)|vwsub(u)
11422 Strategies.push_back(canFoldToVWWithSameExtension);
11423 // add|sub -> vwadd(u)_w|vwsub(u)_w
11424 Strategies.push_back(canFoldToVW_W);
11425 break;
11426 case RISCVISD::MUL_VL:
11427 // mul -> vwmul(u)
11428 Strategies.push_back(canFoldToVWWithSameExtension);
11429 // mul -> vwmulsu
11430 Strategies.push_back(canFoldToVW_SU);
11431 break;
11434 // vwadd_w|vwsub_w -> vwadd|vwsub
11435 Strategies.push_back(canFoldToVWWithSEXT);
11436 break;
11439 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
11440 Strategies.push_back(canFoldToVWWithZEXT);
11441 break;
11442 default:
11443 llvm_unreachable("Unexpected opcode");
11444 }
11445 return Strategies;
11446}
11447} // End anonymous namespace.
11448
11449/// Combine a binary operation to its equivalent VW or VW_W form.
11450/// The supported combines are:
11451/// add_vl -> vwadd(u) | vwadd(u)_w
11452/// sub_vl -> vwsub(u) | vwsub(u)_w
11453/// mul_vl -> vwmul(u) | vwmul_su
11454/// vwadd_w(u) -> vwadd(u)
11455/// vwub_w(u) -> vwadd(u)
11456static SDValue
11458 SelectionDAG &DAG = DCI.DAG;
11459
11460 assert(NodeExtensionHelper::isSupportedRoot(N) &&
11461 "Shouldn't have called this method");
11462 SmallVector<SDNode *> Worklist;
11463 SmallSet<SDNode *, 8> Inserted;
11464 Worklist.push_back(N);
11465 Inserted.insert(N);
11466 SmallVector<CombineResult> CombinesToApply;
11467
11468 while (!Worklist.empty()) {
11469 SDNode *Root = Worklist.pop_back_val();
11470 if (!NodeExtensionHelper::isSupportedRoot(Root))
11471 return SDValue();
11472
11473 NodeExtensionHelper LHS(N, 0, DAG);
11474 NodeExtensionHelper RHS(N, 1, DAG);
11475 auto AppendUsersIfNeeded = [&Worklist,
11476 &Inserted](const NodeExtensionHelper &Op) {
11477 if (Op.needToPromoteOtherUsers()) {
11478 for (SDNode *TheUse : Op.OrigOperand->uses()) {
11479 if (Inserted.insert(TheUse).second)
11480 Worklist.push_back(TheUse);
11481 }
11482 }
11483 };
11484
11485 // Control the compile time by limiting the number of node we look at in
11486 // total.
11487 if (Inserted.size() > ExtensionMaxWebSize)
11488 return SDValue();
11489
11491 NodeExtensionHelper::getSupportedFoldings(N);
11492
11493 assert(!FoldingStrategies.empty() && "Nothing to be folded");
11494 bool Matched = false;
11495 for (int Attempt = 0;
11496 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
11497 ++Attempt) {
11498
11499 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
11500 FoldingStrategies) {
11501 std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS);
11502 if (Res) {
11503 Matched = true;
11504 CombinesToApply.push_back(*Res);
11505 // All the inputs that are extended need to be folded, otherwise
11506 // we would be leaving the old input (since it is may still be used),
11507 // and the new one.
11508 if (Res->SExtLHS.has_value())
11509 AppendUsersIfNeeded(LHS);
11510 if (Res->SExtRHS.has_value())
11511 AppendUsersIfNeeded(RHS);
11512 break;
11513 }
11514 }
11515 std::swap(LHS, RHS);
11516 }
11517 // Right now we do an all or nothing approach.
11518 if (!Matched)
11519 return SDValue();
11520 }
11521 // Store the value for the replacement of the input node separately.
11522 SDValue InputRootReplacement;
11523 // We do the RAUW after we materialize all the combines, because some replaced
11524 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
11525 // some of these nodes may appear in the NodeExtensionHelpers of some of the
11526 // yet-to-be-visited CombinesToApply roots.
11528 ValuesToReplace.reserve(CombinesToApply.size());
11529 for (CombineResult Res : CombinesToApply) {
11530 SDValue NewValue = Res.materialize(DAG);
11531 if (!InputRootReplacement) {
11532 assert(Res.Root == N &&
11533 "First element is expected to be the current node");
11534 InputRootReplacement = NewValue;
11535 } else {
11536 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
11537 }
11538 }
11539 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
11540 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
11541 DCI.AddToWorklist(OldNewValues.second.getNode());
11542 }
11543 return InputRootReplacement;
11544}
11545
11546// Helper function for performMemPairCombine.
11547// Try to combine the memory loads/stores LSNode1 and LSNode2
11548// into a single memory pair operation.
11550 LSBaseSDNode *LSNode2, SDValue BasePtr,
11551 uint64_t Imm) {
11553 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
11554
11555 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
11556 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
11557 return SDValue();
11558
11560 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
11561
11562 // The new operation has twice the width.
11563 MVT XLenVT = Subtarget.getXLenVT();
11564 EVT MemVT = LSNode1->getMemoryVT();
11565 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
11566 MachineMemOperand *MMO = LSNode1->getMemOperand();
11568 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
11569
11570 if (LSNode1->getOpcode() == ISD::LOAD) {
11571 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
11572 unsigned Opcode;
11573 if (MemVT == MVT::i32)
11574 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
11575 else
11576 Opcode = RISCVISD::TH_LDD;
11577
11578 SDValue Res = DAG.getMemIntrinsicNode(
11579 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
11580 {LSNode1->getChain(), BasePtr,
11581 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
11582 NewMemVT, NewMMO);
11583
11584 SDValue Node1 =
11585 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
11586 SDValue Node2 =
11587 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
11588
11589 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
11590 return Node1;
11591 } else {
11592 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
11593
11594 SDValue Res = DAG.getMemIntrinsicNode(
11595 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
11596 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
11597 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
11598 NewMemVT, NewMMO);
11599
11600 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
11601 return Res;
11602 }
11603}
11604
11605// Try to combine two adjacent loads/stores to a single pair instruction from
11606// the XTHeadMemPair vendor extension.
11609 SelectionDAG &DAG = DCI.DAG;
11611 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
11612
11613 // Target does not support load/store pair.
11614 if (!Subtarget.hasVendorXTHeadMemPair())
11615 return SDValue();
11616
11617 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
11618 EVT MemVT = LSNode1->getMemoryVT();
11619 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
11620
11621 // No volatile, indexed or atomic loads/stores.
11622 if (!LSNode1->isSimple() || LSNode1->isIndexed())
11623 return SDValue();
11624
11625 // Function to get a base + constant representation from a memory value.
11626 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
11627 if (Ptr->getOpcode() == ISD::ADD)
11628 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
11629 return {Ptr->getOperand(0), C1->getZExtValue()};
11630 return {Ptr, 0};
11631 };
11632
11633 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
11634
11635 SDValue Chain = N->getOperand(0);
11636 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
11637 UI != UE; ++UI) {
11638 SDUse &Use = UI.getUse();
11639 if (Use.getUser() != N && Use.getResNo() == 0 &&
11640 Use.getUser()->getOpcode() == N->getOpcode()) {
11642
11643 // No volatile, indexed or atomic loads/stores.
11644 if (!LSNode2->isSimple() || LSNode2->isIndexed())
11645 continue;
11646
11647 // Check if LSNode1 and LSNode2 have the same type and extension.
11648 if (LSNode1->getOpcode() == ISD::LOAD)
11649 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
11651 continue;
11652
11653 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
11654 continue;
11655
11656 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
11657
11658 // Check if the base pointer is the same for both instruction.
11659 if (Base1 != Base2)
11660 continue;
11661
11662 // Check if the offsets match the XTHeadMemPair encoding contraints.
11663 bool Valid = false;
11664 if (MemVT == MVT::i32) {
11665 // Check for adjacent i32 values and a 2-bit index.
11666 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
11667 Valid = true;
11668 } else if (MemVT == MVT::i64) {
11669 // Check for adjacent i64 values and a 2-bit index.
11670 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
11671 Valid = true;
11672 }
11673
11674 if (!Valid)
11675 continue;
11676
11677 // Try to combine.
11678 if (SDValue Res =
11679 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
11680 return Res;
11681 }
11682 }
11683
11684 return SDValue();
11685}
11686
11687// Fold
11688// (fp_to_int (froundeven X)) -> fcvt X, rne
11689// (fp_to_int (ftrunc X)) -> fcvt X, rtz
11690// (fp_to_int (ffloor X)) -> fcvt X, rdn
11691// (fp_to_int (fceil X)) -> fcvt X, rup
11692// (fp_to_int (fround X)) -> fcvt X, rmm
11695 const RISCVSubtarget &Subtarget) {
11696 SelectionDAG &DAG = DCI.DAG;
11697 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11698 MVT XLenVT = Subtarget.getXLenVT();
11699
11700 SDValue Src = N->getOperand(0);
11701
11702 // Don't do this for strict-fp Src.
11703 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
11704 return SDValue();
11705
11706 // Ensure the FP type is legal.
11707 if (!TLI.isTypeLegal(Src.getValueType()))
11708 return SDValue();
11709
11710 // Don't do this for f16 with Zfhmin and not Zfh.
11711 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
11712 return SDValue();
11713
11714 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
11715 // If the result is invalid, we didn't find a foldable instruction.
11716 // If the result is dynamic, then we found an frint which we don't yet
11717 // support. It will cause 7 to be written to the FRM CSR for vector.
11718 // FIXME: We could support this by using VFCVT_X_F_VL/VFCVT_XU_F_VL below.
11719 if (FRM == RISCVFPRndMode::Invalid || FRM == RISCVFPRndMode::DYN)
11720 return SDValue();
11721
11722 SDLoc DL(N);
11723 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
11724 EVT VT = N->getValueType(0);
11725
11726 if (VT.isVector() && TLI.isTypeLegal(VT)) {
11727 MVT SrcVT = Src.getSimpleValueType();
11728 MVT SrcContainerVT = SrcVT;
11729 MVT ContainerVT = VT.getSimpleVT();
11730 SDValue XVal = Src.getOperand(0);
11731
11732 // For widening and narrowing conversions we just combine it into a
11733 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
11734 // end up getting lowered to their appropriate pseudo instructions based on
11735 // their operand types
11736 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
11737 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
11738 return SDValue();
11739
11740 // Make fixed-length vectors scalable first
11741 if (SrcVT.isFixedLengthVector()) {
11742 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
11743 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
11744 ContainerVT =
11745 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
11746 }
11747
11748 auto [Mask, VL] =
11749 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
11750
11751 SDValue FpToInt;
11752 if (FRM == RISCVFPRndMode::RTZ) {
11753 // Use the dedicated trunc static rounding mode if we're truncating so we
11754 // don't need to generate calls to fsrmi/fsrm
11755 unsigned Opc =
11757 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
11758 } else {
11759 unsigned Opc =
11761 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
11762 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
11763 }
11764
11765 // If converted from fixed-length to scalable, convert back
11766 if (VT.isFixedLengthVector())
11767 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
11768
11769 return FpToInt;
11770 }
11771
11772 // Only handle XLen or i32 types. Other types narrower than XLen will
11773 // eventually be legalized to XLenVT.
11774 if (VT != MVT::i32 && VT != XLenVT)
11775 return SDValue();
11776
11777 unsigned Opc;
11778 if (VT == XLenVT)
11779 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
11780 else
11782
11783 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
11784 DAG.getTargetConstant(FRM, DL, XLenVT));
11785 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
11786}
11787
11788// Fold
11789// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
11790// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
11791// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
11792// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
11793// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
11796 const RISCVSubtarget &Subtarget) {
11797 SelectionDAG &DAG = DCI.DAG;
11798 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11799 MVT XLenVT = Subtarget.getXLenVT();
11800
11801 // Only handle XLen types. Other types narrower than XLen will eventually be
11802 // legalized to XLenVT.
11803 EVT DstVT = N->getValueType(0);
11804 if (DstVT != XLenVT)
11805 return SDValue();
11806
11807 SDValue Src = N->getOperand(0);
11808
11809 // Don't do this for strict-fp Src.
11810 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
11811 return SDValue();
11812
11813 // Ensure the FP type is also legal.
11814 if (!TLI.isTypeLegal(Src.getValueType()))
11815 return SDValue();
11816
11817 // Don't do this for f16 with Zfhmin and not Zfh.
11818 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
11819 return SDValue();
11820
11821 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11822
11823 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
11824 if (FRM == RISCVFPRndMode::Invalid)
11825 return SDValue();
11826
11827 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
11828
11829 unsigned Opc;
11830 if (SatVT == DstVT)
11831 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
11832 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
11834 else
11835 return SDValue();
11836 // FIXME: Support other SatVTs by clamping before or after the conversion.
11837
11838 Src = Src.getOperand(0);
11839
11840 SDLoc DL(N);
11841 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
11842 DAG.getTargetConstant(FRM, DL, XLenVT));
11843
11844 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
11845 // extend.
11846 if (Opc == RISCVISD::FCVT_WU_RV64)
11847 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
11848
11849 // RISC-V FP-to-int conversions saturate to the destination register size, but
11850 // don't produce 0 for nan.
11851 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
11852 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
11853}
11854
11855// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
11856// smaller than XLenVT.
11858 const RISCVSubtarget &Subtarget) {
11859 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
11860
11861 SDValue Src = N->getOperand(0);
11862 if (Src.getOpcode() != ISD::BSWAP)
11863 return SDValue();
11864
11865 EVT VT = N->getValueType(0);
11866 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
11868 return SDValue();
11869
11870 SDLoc DL(N);
11871 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
11872}
11873
11874// Convert from one FMA opcode to another based on whether we are negating the
11875// multiply result and/or the accumulator.
11876// NOTE: Only supports RVV operations with VL.
11877static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
11878 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
11879 if (NegMul) {
11880 // clang-format off
11881 switch (Opcode) {
11882 default: llvm_unreachable("Unexpected opcode");
11883 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
11884 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
11885 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
11886 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
11891 }
11892 // clang-format on
11893 }
11894
11895 // Negating the accumulator changes ADD<->SUB.
11896 if (NegAcc) {
11897 // clang-format off
11898 switch (Opcode) {
11899 default: llvm_unreachable("Unexpected opcode");
11900 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
11901 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
11902 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
11903 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
11908 }
11909 // clang-format on
11910 }
11911
11912 return Opcode;
11913}
11914
11916 // Fold FNEG_VL into FMA opcodes.
11917 // The first operand of strict-fp is chain.
11918 unsigned Offset = N->isTargetStrictFPOpcode();
11919 SDValue A = N->getOperand(0 + Offset);
11920 SDValue B = N->getOperand(1 + Offset);
11921 SDValue C = N->getOperand(2 + Offset);
11922 SDValue Mask = N->getOperand(3 + Offset);
11923 SDValue VL = N->getOperand(4 + Offset);
11924
11925 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
11926 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
11927 V.getOperand(2) == VL) {
11928 // Return the negated input.
11929 V = V.getOperand(0);
11930 return true;
11931 }
11932
11933 return false;
11934 };
11935
11936 bool NegA = invertIfNegative(A);
11937 bool NegB = invertIfNegative(B);
11938 bool NegC = invertIfNegative(C);
11939
11940 // If no operands are negated, we're done.
11941 if (!NegA && !NegB && !NegC)
11942 return SDValue();
11943
11944 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
11945 if (N->isTargetStrictFPOpcode())
11946 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
11947 {N->getOperand(0), A, B, C, Mask, VL});
11948 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
11949 VL);
11950}
11951
11954 return V;
11955
11956 // FIXME: Ignore strict opcodes for now.
11957 if (N->isTargetStrictFPOpcode())
11958 return SDValue();
11959
11960 // Try to form widening FMA.
11961 SDValue Op0 = N->getOperand(0);
11962 SDValue Op1 = N->getOperand(1);
11963 SDValue Mask = N->getOperand(3);
11964 SDValue VL = N->getOperand(4);
11965
11966 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
11968 return SDValue();
11969
11970 // TODO: Refactor to handle more complex cases similar to
11971 // combineBinOp_VLToVWBinOp_VL.
11972 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
11973 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
11974 return SDValue();
11975
11976 // Check the mask and VL are the same.
11977 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
11978 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
11979 return SDValue();
11980
11981 unsigned NewOpc;
11982 switch (N->getOpcode()) {
11983 default:
11984 llvm_unreachable("Unexpected opcode");
11986 NewOpc = RISCVISD::VFWMADD_VL;
11987 break;
11989 NewOpc = RISCVISD::VFWNMSUB_VL;
11990 break;
11992 NewOpc = RISCVISD::VFWNMADD_VL;
11993 break;
11995 NewOpc = RISCVISD::VFWMSUB_VL;
11996 break;
11997 }
11998
11999 Op0 = Op0.getOperand(0);
12000 Op1 = Op1.getOperand(0);
12001
12002 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
12003 N->getOperand(2), Mask, VL);
12004}
12005
12007 // FIXME: Ignore strict opcodes for now.
12008 assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode");
12009
12010 // Try to form widening multiply.
12011 SDValue Op0 = N->getOperand(0);
12012 SDValue Op1 = N->getOperand(1);
12013 SDValue Merge = N->getOperand(2);
12014 SDValue Mask = N->getOperand(3);
12015 SDValue VL = N->getOperand(4);
12016
12017 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
12019 return SDValue();
12020
12021 // TODO: Refactor to handle more complex cases similar to
12022 // combineBinOp_VLToVWBinOp_VL.
12023 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
12024 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
12025 return SDValue();
12026
12027 // Check the mask and VL are the same.
12028 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
12029 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
12030 return SDValue();
12031
12032 Op0 = Op0.getOperand(0);
12033 Op1 = Op1.getOperand(0);
12034
12035 return DAG.getNode(RISCVISD::VFWMUL_VL, SDLoc(N), N->getValueType(0), Op0,
12036 Op1, Merge, Mask, VL);
12037}
12038
12040 SDValue Op0 = N->getOperand(0);
12041 SDValue Op1 = N->getOperand(1);
12042 SDValue Merge = N->getOperand(2);
12043 SDValue Mask = N->getOperand(3);
12044 SDValue VL = N->getOperand(4);
12045
12046 bool IsAdd = N->getOpcode() == RISCVISD::FADD_VL;
12047
12048 // Look for foldable FP_EXTENDS.
12049 bool Op0IsExtend =
12051 (Op0.hasOneUse() || (Op0 == Op1 && Op0->hasNUsesOfValue(2, 0)));
12052 bool Op1IsExtend =
12053 (Op0 == Op1 && Op0IsExtend) ||
12054 (Op1.getOpcode() == RISCVISD::FP_EXTEND_VL && Op1.hasOneUse());
12055
12056 // Check the mask and VL.
12057 if (Op0IsExtend && (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL))
12058 Op0IsExtend = false;
12059 if (Op1IsExtend && (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL))
12060 Op1IsExtend = false;
12061
12062 // Canonicalize.
12063 if (!Op1IsExtend) {
12064 // Sub requires at least operand 1 to be an extend.
12065 if (!IsAdd)
12066 return SDValue();
12067
12068 // Add is commutable, if the other operand is foldable, swap them.
12069 if (!Op0IsExtend)
12070 return SDValue();
12071
12072 std::swap(Op0, Op1);
12073 std::swap(Op0IsExtend, Op1IsExtend);
12074 }
12075
12076 // Op1 is a foldable extend. Op0 might be foldable.
12077 Op1 = Op1.getOperand(0);
12078 if (Op0IsExtend)
12079 Op0 = Op0.getOperand(0);
12080
12081 unsigned Opc;
12082 if (IsAdd)
12083 Opc = Op0IsExtend ? RISCVISD::VFWADD_VL : RISCVISD::VFWADD_W_VL;
12084 else
12085 Opc = Op0IsExtend ? RISCVISD::VFWSUB_VL : RISCVISD::VFWSUB_W_VL;
12086
12087 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op0, Op1, Merge, Mask,
12088 VL);
12089}
12090
12092 const RISCVSubtarget &Subtarget) {
12093 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
12094
12095 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
12096 return SDValue();
12097
12098 if (!isa<ConstantSDNode>(N->getOperand(1)))
12099 return SDValue();
12100 uint64_t ShAmt = N->getConstantOperandVal(1);
12101 if (ShAmt > 32)
12102 return SDValue();
12103
12104 SDValue N0 = N->getOperand(0);
12105
12106 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
12107 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
12108 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
12109 if (ShAmt < 32 &&
12110 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
12111 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
12112 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
12114 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
12115 if (LShAmt < 32) {
12116 SDLoc ShlDL(N0.getOperand(0));
12117 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
12118 N0.getOperand(0).getOperand(0),
12119 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
12120 SDLoc DL(N);
12121 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
12122 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
12123 }
12124 }
12125
12126 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
12127 // FIXME: Should this be a generic combine? There's a similar combine on X86.
12128 //
12129 // Also try these folds where an add or sub is in the middle.
12130 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
12131 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
12132 SDValue Shl;
12133 ConstantSDNode *AddC = nullptr;
12134
12135 // We might have an ADD or SUB between the SRA and SHL.
12136 bool IsAdd = N0.getOpcode() == ISD::ADD;
12137 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
12138 // Other operand needs to be a constant we can modify.
12139 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
12140 if (!AddC)
12141 return SDValue();
12142
12143 // AddC needs to have at least 32 trailing zeros.
12144 if (AddC->getAPIntValue().countr_zero() < 32)
12145 return SDValue();
12146
12147 // All users should be a shift by constant less than or equal to 32. This
12148 // ensures we'll do this optimization for each of them to produce an
12149 // add/sub+sext_inreg they can all share.
12150 for (SDNode *U : N0->uses()) {
12151 if (U->getOpcode() != ISD::SRA ||
12152 !isa<ConstantSDNode>(U->getOperand(1)) ||
12153 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() > 32)
12154 return SDValue();
12155 }
12156
12157 Shl = N0.getOperand(IsAdd ? 0 : 1);
12158 } else {
12159 // Not an ADD or SUB.
12160 Shl = N0;
12161 }
12162
12163 // Look for a shift left by 32.
12164 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
12165 Shl.getConstantOperandVal(1) != 32)
12166 return SDValue();
12167
12168 // We if we didn't look through an add/sub, then the shl should have one use.
12169 // If we did look through an add/sub, the sext_inreg we create is free so
12170 // we're only creating 2 new instructions. It's enough to only remove the
12171 // original sra+add/sub.
12172 if (!AddC && !Shl.hasOneUse())
12173 return SDValue();
12174
12175 SDLoc DL(N);
12176 SDValue In = Shl.getOperand(0);
12177
12178 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
12179 // constant.
12180 if (AddC) {
12181 SDValue ShiftedAddC =
12182 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
12183 if (IsAdd)
12184 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
12185 else
12186 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
12187 }
12188
12189 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
12190 DAG.getValueType(MVT::i32));
12191 if (ShAmt == 32)
12192 return SExt;
12193
12194 return DAG.getNode(
12195 ISD::SHL, DL, MVT::i64, SExt,
12196 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
12197}
12198
12199// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
12200// the result is used as the conditon of a br_cc or select_cc we can invert,
12201// inverting the setcc is free, and Z is 0/1. Caller will invert the
12202// br_cc/select_cc.
12204 bool IsAnd = Cond.getOpcode() == ISD::AND;
12205 if (!IsAnd && Cond.getOpcode() != ISD::OR)
12206 return SDValue();
12207
12208 if (!Cond.hasOneUse())
12209 return SDValue();
12210
12211 SDValue Setcc = Cond.getOperand(0);
12213 // Canonicalize setcc to LHS.
12214 if (Setcc.getOpcode() != ISD::SETCC)
12215 std::swap(Setcc, Xor);
12216 // LHS should be a setcc and RHS should be an xor.
12217 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
12218 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
12219 return SDValue();
12220
12221 // If the condition is an And, SimplifyDemandedBits may have changed
12222 // (xor Z, 1) to (not Z).
12223 SDValue Xor1 = Xor.getOperand(1);
12224 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
12225 return SDValue();
12226
12227 EVT VT = Cond.getValueType();
12228 SDValue Xor0 = Xor.getOperand(0);
12229
12230 // The LHS of the xor needs to be 0/1.
12232 if (!DAG.MaskedValueIsZero(Xor0, Mask))
12233 return SDValue();
12234
12235 // We can only invert integer setccs.
12236 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
12237 if (!SetCCOpVT.isScalarInteger())
12238 return SDValue();
12239
12240 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
12241 if (ISD::isIntEqualitySetCC(CCVal)) {
12242 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
12243 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
12244 Setcc.getOperand(1), CCVal);
12245 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
12246 // Invert (setlt 0, X) by converting to (setlt X, 1).
12247 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
12248 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
12249 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
12250 // (setlt X, 1) by converting to (setlt 0, X).
12251 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
12252 DAG.getConstant(0, SDLoc(Setcc), VT),
12253 Setcc.getOperand(0), CCVal);
12254 } else
12255 return SDValue();
12256
12257 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
12258 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
12259}
12260
12261// Perform common combines for BR_CC and SELECT_CC condtions.
12262static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
12263 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
12264 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
12265
12266 // As far as arithmetic right shift always saves the sign,
12267 // shift can be omitted.
12268 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
12269 // setge (sra X, N), 0 -> setge X, 0
12270 if (auto *RHSConst = dyn_cast<ConstantSDNode>(RHS.getNode())) {
12271 if ((CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
12272 LHS.getOpcode() == ISD::SRA && RHSConst->isZero()) {
12273 LHS = LHS.getOperand(0);
12274 return true;
12275 }
12276 }
12277
12278 if (!ISD::isIntEqualitySetCC(CCVal))
12279 return false;
12280
12281 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
12282 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
12283 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
12284 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
12285 // If we're looking for eq 0 instead of ne 0, we need to invert the
12286 // condition.
12287 bool Invert = CCVal == ISD::SETEQ;
12288 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
12289 if (Invert)
12290 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
12291
12292 RHS = LHS.getOperand(1);
12293 LHS = LHS.getOperand(0);
12294 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
12295
12296 CC = DAG.getCondCode(CCVal);
12297 return true;
12298 }
12299
12300 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
12301 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
12302 RHS = LHS.getOperand(1);
12303 LHS = LHS.getOperand(0);
12304 return true;
12305 }
12306
12307 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
12308 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
12309 LHS.getOperand(1).getOpcode() == ISD::Constant) {
12310 SDValue LHS0 = LHS.getOperand(0);
12311 if (LHS0.getOpcode() == ISD::AND &&
12312 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
12313 uint64_t Mask = LHS0.getConstantOperandVal(1);
12314 uint64_t ShAmt = LHS.getConstantOperandVal(1);
12315 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
12316 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
12317 CC = DAG.getCondCode(CCVal);
12318
12319 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
12320 LHS = LHS0.getOperand(0);
12321 if (ShAmt != 0)
12322 LHS =
12323 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
12324 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
12325 return true;
12326 }
12327 }
12328 }
12329
12330 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
12331 // This can occur when legalizing some floating point comparisons.
12332 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
12333 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
12334 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
12335 CC = DAG.getCondCode(CCVal);
12336 RHS = DAG.getConstant(0, DL, LHS.getValueType());
12337 return true;
12338 }
12339
12340 if (isNullConstant(RHS)) {
12341 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
12342 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
12343 CC = DAG.getCondCode(CCVal);
12344 LHS = NewCond;
12345 return true;
12346 }
12347 }
12348
12349 return false;
12350}
12351
12352// Fold
12353// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
12354// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
12355// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
12356// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
12358 SDValue TrueVal, SDValue FalseVal,
12359 bool Swapped) {
12360 bool Commutative = true;
12361 switch (TrueVal.getOpcode()) {
12362 default:
12363 return SDValue();
12364 case ISD::SUB:
12365 Commutative = false;
12366 break;
12367 case ISD::ADD:
12368 case ISD::OR:
12369 case ISD::XOR:
12370 break;
12371 }
12372
12373 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
12374 return SDValue();
12375
12376 unsigned OpToFold;
12377 if (FalseVal == TrueVal.getOperand(0))
12378 OpToFold = 0;
12379 else if (Commutative && FalseVal == TrueVal.getOperand(1))
12380 OpToFold = 1;
12381 else
12382 return SDValue();
12383
12384 EVT VT = N->getValueType(0);
12385 SDLoc DL(N);
12386 SDValue Zero = DAG.getConstant(0, DL, VT);
12387 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
12388
12389 if (Swapped)
12390 std::swap(OtherOp, Zero);
12391 SDValue NewSel = DAG.getSelect(DL, VT, N->getOperand(0), OtherOp, Zero);
12392 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
12393}
12394
12395// This tries to get rid of `select` and `icmp` that are being used to handle
12396// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
12398 SDValue Cond = N->getOperand(0);
12399
12400 // This represents either CTTZ or CTLZ instruction.
12401 SDValue CountZeroes;
12402
12403 SDValue ValOnZero;
12404
12405 if (Cond.getOpcode() != ISD::SETCC)
12406 return SDValue();
12407
12408 if (!isNullConstant(Cond->getOperand(1)))
12409 return SDValue();
12410
12411 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
12412 if (CCVal == ISD::CondCode::SETEQ) {
12413 CountZeroes = N->getOperand(2);
12414 ValOnZero = N->getOperand(1);
12415 } else if (CCVal == ISD::CondCode::SETNE) {
12416 CountZeroes = N->getOperand(1);
12417 ValOnZero = N->getOperand(2);
12418 } else {
12419 return SDValue();
12420 }
12421
12422 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
12423 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
12424 CountZeroes = CountZeroes.getOperand(0);
12425
12426 if (CountZeroes.getOpcode() != ISD::CTTZ &&
12427 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
12428 CountZeroes.getOpcode() != ISD::CTLZ &&
12429 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
12430 return SDValue();
12431
12432 if (!isNullConstant(ValOnZero))
12433 return SDValue();
12434
12435 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
12436 if (Cond->getOperand(0) != CountZeroesArgument)
12437 return SDValue();
12438
12439 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
12440 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
12441 CountZeroes.getValueType(), CountZeroesArgument);
12442 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
12443 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
12444 CountZeroes.getValueType(), CountZeroesArgument);
12445 }
12446
12447 unsigned BitWidth = CountZeroes.getValueSizeInBits();
12448 SDValue BitWidthMinusOne =
12449 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
12450
12451 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
12452 CountZeroes, BitWidthMinusOne);
12453 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
12454}
12455
12457 const RISCVSubtarget &Subtarget) {
12458 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
12459 return Folded;
12460
12461 if (Subtarget.hasShortForwardBranchOpt())
12462 return SDValue();
12463
12464 SDValue TrueVal = N->getOperand(1);
12465 SDValue FalseVal = N->getOperand(2);
12466 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
12467 return V;
12468 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
12469}
12470
12471// If we're concatenating a series of vector loads like
12472// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
12473// Then we can turn this into a strided load by widening the vector elements
12474// vlse32 p, stride=n
12476 const RISCVSubtarget &Subtarget,
12477 const RISCVTargetLowering &TLI) {
12478 SDLoc DL(N);
12479 EVT VT = N->getValueType(0);
12480
12481 // Only perform this combine on legal MVTs.
12482 if (!TLI.isTypeLegal(VT))
12483 return SDValue();
12484
12485 // TODO: Potentially extend this to scalable vectors
12486 if (VT.isScalableVector())
12487 return SDValue();
12488
12489 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
12490 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
12491 !SDValue(BaseLd, 0).hasOneUse())
12492 return SDValue();
12493
12494 EVT BaseLdVT = BaseLd->getValueType(0);
12495 SDValue BasePtr = BaseLd->getBasePtr();
12496
12497 // Go through the loads and check that they're strided
12498 SDValue CurPtr = BasePtr;
12499 SDValue Stride;
12500 Align Align = BaseLd->getAlign();
12501
12502 for (SDValue Op : N->ops().drop_front()) {
12503 auto *Ld = dyn_cast<LoadSDNode>(Op);
12504 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
12505 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
12506 Ld->getValueType(0) != BaseLdVT)
12507 return SDValue();
12508
12509 SDValue Ptr = Ld->getBasePtr();
12510 // Check that each load's pointer is (add CurPtr, Stride)
12511 if (Ptr.getOpcode() != ISD::ADD || Ptr.getOperand(0) != CurPtr)
12512 return SDValue();
12514 if (!Stride)
12515 Stride = Offset;
12516 else if (Offset != Stride)
12517 return SDValue();
12518
12519 // The common alignment is the most restrictive (smallest) of all the loads
12520 Align = std::min(Align, Ld->getAlign());
12521
12522 CurPtr = Ptr;
12523 }
12524
12525 // A special case is if the stride is exactly the width of one of the loads,
12526 // in which case it's contiguous and can be combined into a regular vle
12527 // without changing the element size
12528 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
12529 ConstStride &&
12530 ConstStride->getZExtValue() == BaseLdVT.getFixedSizeInBits() / 8) {
12532 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(),
12533 VT.getStoreSize(), Align);
12534 // Can't do the combine if the load isn't naturally aligned with the element
12535 // type
12537 DAG.getDataLayout(), VT, *MMO))
12538 return SDValue();
12539
12540 SDValue WideLoad = DAG.getLoad(VT, DL, BaseLd->getChain(), BasePtr, MMO);
12541 for (SDValue Ld : N->ops())
12543 return WideLoad;
12544 }
12545
12546 // Get the widened scalar type, e.g. v4i8 -> i64
12547 unsigned WideScalarBitWidth =
12548 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
12549 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
12550
12551 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
12552 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
12553 if (!TLI.isTypeLegal(WideVecVT))
12554 return SDValue();
12555
12556 // Check that the operation is legal
12557 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
12558 return SDValue();
12559
12560 MVT ContainerVT = TLI.getContainerForFixedLengthVector(WideVecVT);
12561 SDValue VL =
12562 getDefaultVLOps(WideVecVT, ContainerVT, DL, DAG, Subtarget).second;
12563 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12564 SDValue IntID =
12565 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, Subtarget.getXLenVT());
12566 SDValue Ops[] = {BaseLd->getChain(),
12567 IntID,
12568 DAG.getUNDEF(ContainerVT),
12569 BasePtr,
12570 Stride,
12571 VL};
12572
12573 uint64_t MemSize;
12574 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride))
12575 // total size = (elsize * n) + (stride - elsize) * (n-1)
12576 // = elsize + stride * (n-1)
12577 MemSize = WideScalarVT.getSizeInBits() +
12578 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
12579 else
12580 // If Stride isn't constant, then we can't know how much it will load
12582
12584 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
12585 Align);
12586
12587 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
12588 Ops, WideVecVT, MMO);
12589 for (SDValue Ld : N->ops())
12590 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
12591
12592 // Note: Perform the bitcast before the convertFromScalableVector so we have
12593 // balanced pairs of convertFromScalable/convertToScalable
12594 SDValue Res = DAG.getBitcast(
12595 TLI.getContainerForFixedLengthVector(VT.getSimpleVT()), StridedLoad);
12596 return convertFromScalableVector(VT, Res, DAG, Subtarget);
12597}
12598
12600 const RISCVSubtarget &Subtarget) {
12601 assert(N->getOpcode() == RISCVISD::ADD_VL);
12602 SDValue Addend = N->getOperand(0);
12603 SDValue MulOp = N->getOperand(1);
12604 SDValue AddMergeOp = N->getOperand(2);
12605
12606 if (!AddMergeOp.isUndef())
12607 return SDValue();
12608
12609 auto IsVWMulOpc = [](unsigned Opc) {
12610 switch (Opc) {
12611 case RISCVISD::VWMUL_VL:
12614 return true;
12615 default:
12616 return false;
12617 }
12618 };
12619
12620 if (!IsVWMulOpc(MulOp.getOpcode()))
12621 std::swap(Addend, MulOp);
12622
12623 if (!IsVWMulOpc(MulOp.getOpcode()))
12624 return SDValue();
12625
12626 SDValue MulMergeOp = MulOp.getOperand(2);
12627
12628 if (!MulMergeOp.isUndef())
12629 return SDValue();
12630
12631 SDValue AddMask = N->getOperand(3);
12632 SDValue AddVL = N->getOperand(4);
12633 SDValue MulMask = MulOp.getOperand(3);
12634 SDValue MulVL = MulOp.getOperand(4);
12635
12636 if (AddMask != MulMask || AddVL != MulVL)
12637 return SDValue();
12638
12639 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
12640 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
12641 "Unexpected opcode after VWMACC_VL");
12642 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
12643 "Unexpected opcode after VWMACC_VL!");
12644 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
12645 "Unexpected opcode after VWMUL_VL!");
12646 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
12647 "Unexpected opcode after VWMUL_VL!");
12648
12649 SDLoc DL(N);
12650 EVT VT = N->getValueType(0);
12651 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
12652 AddVL};
12653 return DAG.getNode(Opc, DL, VT, Ops);
12654}
12655
12657 DAGCombinerInfo &DCI) const {
12658 SelectionDAG &DAG = DCI.DAG;
12659
12660 // Helper to call SimplifyDemandedBits on an operand of N where only some low
12661 // bits are demanded. N will be added to the Worklist if it was not deleted.
12662 // Caller should return SDValue(N, 0) if this returns true.
12663 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
12664 SDValue Op = N->getOperand(OpNo);
12665 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
12666 if (!SimplifyDemandedBits(Op, Mask, DCI))
12667 return false;
12668
12669 if (N->getOpcode() != ISD::DELETED_NODE)
12670 DCI.AddToWorklist(N);
12671 return true;
12672 };
12673
12674 switch (N->getOpcode()) {
12675 default:
12676 break;
12677 case RISCVISD::SplitF64: {
12678 SDValue Op0 = N->getOperand(0);
12679 // If the input to SplitF64 is just BuildPairF64 then the operation is
12680 // redundant. Instead, use BuildPairF64's operands directly.
12681 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
12682 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
12683
12684 if (Op0->isUndef()) {
12685 SDValue Lo = DAG.getUNDEF(MVT::i32);
12686 SDValue Hi = DAG.getUNDEF(MVT::i32);
12687 return DCI.CombineTo(N, Lo, Hi);
12688 }
12689
12690 SDLoc DL(N);
12691
12692 // It's cheaper to materialise two 32-bit integers than to load a double
12693 // from the constant pool and transfer it to integer registers through the
12694 // stack.
12696 APInt V = C->getValueAPF().bitcastToAPInt();
12697 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
12698 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
12699 return DCI.CombineTo(N, Lo, Hi);
12700 }
12701
12702 // This is a target-specific version of a DAGCombine performed in
12703 // DAGCombiner::visitBITCAST. It performs the equivalent of:
12704 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12705 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12706 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
12707 !Op0.getNode()->hasOneUse())
12708 break;
12709 SDValue NewSplitF64 =
12710 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
12711 Op0.getOperand(0));
12712 SDValue Lo = NewSplitF64.getValue(0);
12713 SDValue Hi = NewSplitF64.getValue(1);
12714 APInt SignBit = APInt::getSignMask(32);
12715 if (Op0.getOpcode() == ISD::FNEG) {
12716 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
12717 DAG.getConstant(SignBit, DL, MVT::i32));
12718 return DCI.CombineTo(N, Lo, NewHi);
12719 }
12720 assert(Op0.getOpcode() == ISD::FABS);
12721 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
12722 DAG.getConstant(~SignBit, DL, MVT::i32));
12723 return DCI.CombineTo(N, Lo, NewHi);
12724 }
12725 case RISCVISD::SLLW:
12726 case RISCVISD::SRAW:
12727 case RISCVISD::SRLW:
12728 case RISCVISD::RORW:
12729 case RISCVISD::ROLW: {
12730 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
12731 if (SimplifyDemandedLowBitsHelper(0, 32) ||
12732 SimplifyDemandedLowBitsHelper(1, 5))
12733 return SDValue(N, 0);
12734
12735 break;
12736 }
12737 case RISCVISD::CLZW:
12738 case RISCVISD::CTZW: {
12739 // Only the lower 32 bits of the first operand are read
12740 if (SimplifyDemandedLowBitsHelper(0, 32))
12741 return SDValue(N, 0);
12742 break;
12743 }
12745 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
12746 // conversion is unnecessary and can be replaced with the
12747 // FMV_X_ANYEXTW_RV64 operand.
12748 SDValue Op0 = N->getOperand(0);
12750 return Op0.getOperand(0);
12751 break;
12752 }
12755 SDLoc DL(N);
12756 SDValue Op0 = N->getOperand(0);
12757 MVT VT = N->getSimpleValueType(0);
12758 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
12759 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
12760 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
12761 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
12762 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
12763 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
12764 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
12765 assert(Op0.getOperand(0).getValueType() == VT &&
12766 "Unexpected value type!");
12767 return Op0.getOperand(0);
12768 }
12769
12770 // This is a target-specific version of a DAGCombine performed in
12771 // DAGCombiner::visitBITCAST. It performs the equivalent of:
12772 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12773 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12774 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
12775 !Op0.getNode()->hasOneUse())
12776 break;
12777 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
12778 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
12779 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
12780 if (Op0.getOpcode() == ISD::FNEG)
12781 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
12782 DAG.getConstant(SignBit, DL, VT));
12783
12784 assert(Op0.getOpcode() == ISD::FABS);
12785 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
12786 DAG.getConstant(~SignBit, DL, VT));
12787 }
12788 case ISD::ADD:
12789 return performADDCombine(N, DAG, Subtarget);
12790 case ISD::SUB:
12791 return performSUBCombine(N, DAG, Subtarget);
12792 case ISD::AND:
12793 return performANDCombine(N, DCI, Subtarget);
12794 case ISD::OR:
12795 return performORCombine(N, DCI, Subtarget);
12796 case ISD::XOR:
12797 return performXORCombine(N, DAG, Subtarget);
12798 case ISD::FADD:
12799 case ISD::UMAX:
12800 case ISD::UMIN:
12801 case ISD::SMAX:
12802 case ISD::SMIN:
12803 case ISD::FMAXNUM:
12804 case ISD::FMINNUM:
12805 return combineBinOpToReduce(N, DAG, Subtarget);
12806 case ISD::SETCC:
12807 return performSETCCCombine(N, DAG, Subtarget);
12809 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
12810 case ISD::ZERO_EXTEND:
12811 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
12812 // type legalization. This is safe because fp_to_uint produces poison if
12813 // it overflows.
12814 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
12815 SDValue Src = N->getOperand(0);
12816 if (Src.getOpcode() == ISD::FP_TO_UINT &&
12817 isTypeLegal(Src.getOperand(0).getValueType()))
12818 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
12819 Src.getOperand(0));
12820 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
12821 isTypeLegal(Src.getOperand(1).getValueType())) {
12822 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12823 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
12824 Src.getOperand(0), Src.getOperand(1));
12825 DCI.CombineTo(N, Res);
12826 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
12827 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
12828 return SDValue(N, 0); // Return N so it doesn't get rechecked.
12829 }
12830 }
12831 return SDValue();
12832 case ISD::TRUNCATE:
12833 return performTRUNCATECombine(N, DAG, Subtarget);
12834 case ISD::SELECT:
12835 return performSELECTCombine(N, DAG, Subtarget);
12838 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
12839 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
12840 if (N->getOperand(1).getOpcode() == ISD::XOR &&
12841 isOneConstant(N->getOperand(1).getOperand(1))) {
12843 APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1);
12844 if (DAG.MaskedValueIsZero(Cond, Mask)) {
12845 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
12848 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0),
12849 N->getOperand(0), Cond);
12850 }
12851 }
12852 return SDValue();
12853
12854 case RISCVISD::SELECT_CC: {
12855 // Transform
12856 SDValue LHS = N->getOperand(0);
12857 SDValue RHS = N->getOperand(1);
12858 SDValue CC = N->getOperand(2);
12859 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
12860 SDValue TrueV = N->getOperand(3);
12861 SDValue FalseV = N->getOperand(4);
12862 SDLoc DL(N);
12863 EVT VT = N->getValueType(0);
12864
12865 // If the True and False values are the same, we don't need a select_cc.
12866 if (TrueV == FalseV)
12867 return TrueV;
12868
12869 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
12870 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
12871 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
12872 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
12873 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
12874 if (CCVal == ISD::CondCode::SETGE)
12875 std::swap(TrueV, FalseV);
12876
12877 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
12878 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
12879 // Only handle simm12, if it is not in this range, it can be considered as
12880 // register.
12881 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
12882 isInt<12>(TrueSImm - FalseSImm)) {
12883 SDValue SRA =
12884 DAG.getNode(ISD::SRA, DL, VT, LHS,
12885 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
12886 SDValue AND =
12887 DAG.getNode(ISD::AND, DL, VT, SRA,
12888 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
12889 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
12890 }
12891
12892 if (CCVal == ISD::CondCode::SETGE)
12893 std::swap(TrueV, FalseV);
12894 }
12895
12896 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
12897 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
12898 {LHS, RHS, CC, TrueV, FalseV});
12899
12900 if (!Subtarget.hasShortForwardBranchOpt()) {
12901 // (select c, -1, y) -> -c | y
12902 if (isAllOnesConstant(TrueV)) {
12903 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
12904 SDValue Neg = DAG.getNegative(C, DL, VT);
12905 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
12906 }
12907 // (select c, y, -1) -> -!c | y
12908 if (isAllOnesConstant(FalseV)) {
12909 SDValue C =
12910 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
12911 SDValue Neg = DAG.getNegative(C, DL, VT);
12912 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
12913 }
12914
12915 // (select c, 0, y) -> -!c & y
12916 if (isNullConstant(TrueV)) {
12917 SDValue C =
12918 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
12919 SDValue Neg = DAG.getNegative(C, DL, VT);
12920 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
12921 }
12922 // (select c, y, 0) -> -c & y
12923 if (isNullConstant(FalseV)) {
12924 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
12925 SDValue Neg = DAG.getNegative(C, DL, VT);
12926 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
12927 }
12928 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
12929 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
12930 if (((isOneConstant(FalseV) && LHS == TrueV &&
12931 CCVal == ISD::CondCode::SETNE) ||
12932 (isOneConstant(TrueV) && LHS == FalseV &&
12933 CCVal == ISD::CondCode::SETEQ)) &&
12934 isNullConstant(RHS)) {
12935 // freeze it to be safe.
12936 LHS = DAG.getFreeze(LHS);
12937 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
12938 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
12939 }
12940 }
12941
12942 return SDValue();
12943 }
12944 case RISCVISD::BR_CC: {
12945 SDValue LHS = N->getOperand(1);
12946 SDValue RHS = N->getOperand(2);
12947 SDValue CC = N->getOperand(3);
12948 SDLoc DL(N);
12949
12950 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
12951 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
12952 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
12953
12954 return SDValue();
12955 }
12956 case ISD::BITREVERSE:
12957 return performBITREVERSECombine(N, DAG, Subtarget);
12958 case ISD::FP_TO_SINT:
12959 case ISD::FP_TO_UINT:
12960 return performFP_TO_INTCombine(N, DCI, Subtarget);
12963 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
12964 case ISD::FCOPYSIGN: {
12965 EVT VT = N->getValueType(0);
12966 if (!VT.isVector())
12967 break;
12968 // There is a form of VFSGNJ which injects the negated sign of its second
12969 // operand. Try and bubble any FNEG up after the extend/round to produce
12970 // this optimized pattern. Avoid modifying cases where FP_ROUND and
12971 // TRUNC=1.
12972 SDValue In2 = N->getOperand(1);
12973 // Avoid cases where the extend/round has multiple uses, as duplicating
12974 // those is typically more expensive than removing a fneg.
12975 if (!In2.hasOneUse())
12976 break;
12977 if (In2.getOpcode() != ISD::FP_EXTEND &&
12978 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
12979 break;
12980 In2 = In2.getOperand(0);
12981 if (In2.getOpcode() != ISD::FNEG)
12982 break;
12983 SDLoc DL(N);
12984 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
12985 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
12986 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
12987 }
12988 case ISD::MGATHER:
12989 case ISD::MSCATTER:
12990 case ISD::VP_GATHER:
12991 case ISD::VP_SCATTER: {
12992 if (!DCI.isBeforeLegalize())
12993 break;
12994 SDValue Index, ScaleOp;
12995 bool IsIndexSigned = false;
12996 if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
12997 Index = VPGSN->getIndex();
12998 ScaleOp = VPGSN->getScale();
12999 IsIndexSigned = VPGSN->isIndexSigned();
13000 assert(!VPGSN->isIndexScaled() &&
13001 "Scaled gather/scatter should not be formed");
13002 } else {
13003 const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
13004 Index = MGSN->getIndex();
13005 ScaleOp = MGSN->getScale();
13006 IsIndexSigned = MGSN->isIndexSigned();
13007 assert(!MGSN->isIndexScaled() &&
13008 "Scaled gather/scatter should not be formed");
13009
13010 }
13011 EVT IndexVT = Index.getValueType();
13012 MVT XLenVT = Subtarget.getXLenVT();
13013 // RISC-V indexed loads only support the "unsigned unscaled" addressing
13014 // mode, so anything else must be manually legalized.
13015 bool NeedsIdxLegalization =
13016 (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
13017 if (!NeedsIdxLegalization)
13018 break;
13019
13020 SDLoc DL(N);
13021
13022 // Any index legalization should first promote to XLenVT, so we don't lose
13023 // bits when scaling. This may create an illegal index type so we let
13024 // LLVM's legalization take care of the splitting.
13025 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
13026 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
13027 IndexVT = IndexVT.changeVectorElementType(XLenVT);
13028 Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
13029 DL, IndexVT, Index);
13030 }
13031
13033 if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
13034 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
13035 {VPGN->getChain(), VPGN->getBasePtr(), Index,
13036 ScaleOp, VPGN->getMask(),
13037 VPGN->getVectorLength()},
13038 VPGN->getMemOperand(), NewIndexTy);
13039 if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
13040 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
13041 {VPSN->getChain(), VPSN->getValue(),
13042 VPSN->getBasePtr(), Index, ScaleOp,
13043 VPSN->getMask(), VPSN->getVectorLength()},
13044 VPSN->getMemOperand(), NewIndexTy);
13045 if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
13046 return DAG.getMaskedGather(
13047 N->getVTList(), MGN->getMemoryVT(), DL,
13048 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
13049 MGN->getBasePtr(), Index, ScaleOp},
13050 MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
13051 const auto *MSN = cast<MaskedScatterSDNode>(N);
13052 return DAG.getMaskedScatter(
13053 N->getVTList(), MSN->getMemoryVT(), DL,
13054 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
13055 Index, ScaleOp},
13056 MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
13057 }
13058 case RISCVISD::SRA_VL:
13059 case RISCVISD::SRL_VL:
13060 case RISCVISD::SHL_VL: {
13061 SDValue ShAmt = N->getOperand(1);
13063 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
13064 SDLoc DL(N);
13065 SDValue VL = N->getOperand(3);
13066 EVT VT = N->getValueType(0);
13067 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
13068 ShAmt.getOperand(1), VL);
13069 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
13070 N->getOperand(2), N->getOperand(3), N->getOperand(4));
13071 }
13072 break;
13073 }
13074 case ISD::SRA:
13075 if (SDValue V = performSRACombine(N, DAG, Subtarget))
13076 return V;
13077 [[fallthrough]];
13078 case ISD::SRL:
13079 case ISD::SHL: {
13080 SDValue ShAmt = N->getOperand(1);
13082 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
13083 SDLoc DL(N);
13084 EVT VT = N->getValueType(0);
13085 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
13086 ShAmt.getOperand(1),
13087 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
13088 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
13089 }
13090 break;
13091 }
13092 case RISCVISD::ADD_VL:
13094 return V;
13095 return combineToVWMACC(N, DAG, Subtarget);
13096 case RISCVISD::SUB_VL:
13101 case RISCVISD::MUL_VL:
13102 return combineBinOp_VLToVWBinOp_VL(N, DCI);
13111 return performVFMADD_VLCombine(N, DAG);
13112 case RISCVISD::FMUL_VL:
13113 return performVFMUL_VLCombine(N, DAG);
13114 case RISCVISD::FADD_VL:
13115 case RISCVISD::FSUB_VL:
13116 return performFADDSUB_VLCombine(N, DAG);
13117 case ISD::LOAD:
13118 case ISD::STORE: {
13119 if (DCI.isAfterLegalizeDAG())
13120 if (SDValue V = performMemPairCombine(N, DCI))
13121 return V;
13122
13123 if (N->getOpcode() != ISD::STORE)
13124 break;
13125
13126 auto *Store = cast<StoreSDNode>(N);
13127 SDValue Chain = Store->getChain();
13128 EVT MemVT = Store->getMemoryVT();
13129 SDValue Val = Store->getValue();
13130 SDLoc DL(N);
13131
13132 bool IsScalarizable =
13133 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
13134 Store->isSimple() &&
13135 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
13136 isPowerOf2_64(MemVT.getSizeInBits()) &&
13137 MemVT.getSizeInBits() <= Subtarget.getXLen();
13138
13139 // If sufficiently aligned we can scalarize stores of constant vectors of
13140 // any power-of-two size up to XLen bits, provided that they aren't too
13141 // expensive to materialize.
13142 // vsetivli zero, 2, e8, m1, ta, ma
13143 // vmv.v.i v8, 4
13144 // vse64.v v8, (a0)
13145 // ->
13146 // li a1, 1028
13147 // sh a1, 0(a0)
13148 if (DCI.isBeforeLegalize() && IsScalarizable &&
13150 // Get the constant vector bits
13151 APInt NewC(Val.getValueSizeInBits(), 0);
13152 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
13153 if (Val.getOperand(i).isUndef())
13154 continue;
13156 i * Val.getScalarValueSizeInBits());
13157 }
13158 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
13159
13160 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(),
13161 Subtarget.getFeatureBits(), true) <= 2 &&
13163 NewVT, *Store->getMemOperand())) {
13164 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
13165 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
13166 Store->getPointerInfo(), Store->getOriginalAlign(),
13167 Store->getMemOperand()->getFlags());
13168 }
13169 }
13170
13171 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
13172 // vsetivli zero, 2, e16, m1, ta, ma
13173 // vle16.v v8, (a0)
13174 // vse16.v v8, (a1)
13175 if (auto *L = dyn_cast<LoadSDNode>(Val);
13176 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
13177 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
13178 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
13179 L->getMemoryVT() == MemVT) {
13180 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
13182 NewVT, *Store->getMemOperand()) &&
13184 NewVT, *L->getMemOperand())) {
13185 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
13186 L->getPointerInfo(), L->getOriginalAlign(),
13187 L->getMemOperand()->getFlags());
13188 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
13189 Store->getPointerInfo(), Store->getOriginalAlign(),
13190 Store->getMemOperand()->getFlags());
13191 }
13192 }
13193
13194 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
13195 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
13196 // any illegal types.
13197 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
13198 (DCI.isAfterLegalizeDAG() &&
13200 isNullConstant(Val.getOperand(1)))) {
13201 SDValue Src = Val.getOperand(0);
13202 MVT VecVT = Src.getSimpleValueType();
13203 // VecVT should be scalable and memory VT should match the element type.
13204 if (VecVT.isScalableVector() &&
13205 MemVT == VecVT.getVectorElementType()) {
13206 SDLoc DL(N);
13207 MVT MaskVT = getMaskTypeFor(VecVT);
13208 return DAG.getStoreVP(
13209 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
13210 DAG.getConstant(1, DL, MaskVT),
13211 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
13212 Store->getMemOperand(), Store->getAddressingMode(),
13213 Store->isTruncatingStore(), /*IsCompress*/ false);
13214 }
13215 }
13216
13217 break;
13218 }
13219 case ISD::SPLAT_VECTOR: {
13220 EVT VT = N->getValueType(0);
13221 // Only perform this combine on legal MVT types.
13222 if (!isTypeLegal(VT))
13223 break;
13224 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
13225 DAG, Subtarget))
13226 return Gather;
13227 break;
13228 }
13230 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
13231 return V;
13232 break;
13233 case RISCVISD::VMV_V_X_VL: {
13234 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
13235 // scalar input.
13236 unsigned ScalarSize = N->getOperand(1).getValueSizeInBits();
13237 unsigned EltWidth = N->getValueType(0).getScalarSizeInBits();
13238 if (ScalarSize > EltWidth && N->getOperand(0).isUndef())
13239 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
13240 return SDValue(N, 0);
13241
13242 break;
13243 }
13244 case RISCVISD::VFMV_S_F_VL: {
13245 SDValue Src = N->getOperand(1);
13246 // Try to remove vector->scalar->vector if the scalar->vector is inserting
13247 // into an undef vector.
13248 // TODO: Could use a vslide or vmv.v.v for non-undef.
13249 if (N->getOperand(0).isUndef() &&
13250 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13251 isNullConstant(Src.getOperand(1)) &&
13252 Src.getOperand(0).getValueType().isScalableVector()) {
13253 EVT VT = N->getValueType(0);
13254 EVT SrcVT = Src.getOperand(0).getValueType();
13256 // Widths match, just return the original vector.
13257 if (SrcVT == VT)
13258 return Src.getOperand(0);
13259 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
13260 }
13261 break;
13262 }
13266 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
13267 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
13268 switch (IntNo) {
13269 // By default we do not combine any intrinsic.
13270 default:
13271 return SDValue();
13272 case Intrinsic::riscv_vcpop:
13273 case Intrinsic::riscv_vcpop_mask:
13274 case Intrinsic::riscv_vfirst:
13275 case Intrinsic::riscv_vfirst_mask: {
13276 SDValue VL = N->getOperand(2);
13277 if (IntNo == Intrinsic::riscv_vcpop_mask ||
13278 IntNo == Intrinsic::riscv_vfirst_mask)
13279 VL = N->getOperand(3);
13280 if (!isNullConstant(VL))
13281 return SDValue();
13282 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
13283 SDLoc DL(N);
13284 EVT VT = N->getValueType(0);
13285 if (IntNo == Intrinsic::riscv_vfirst ||
13286 IntNo == Intrinsic::riscv_vfirst_mask)
13287 return DAG.getConstant(-1, DL, VT);
13288 return DAG.getConstant(0, DL, VT);
13289 }
13290 case Intrinsic::riscv_vloxei:
13291 case Intrinsic::riscv_vloxei_mask:
13292 case Intrinsic::riscv_vluxei:
13293 case Intrinsic::riscv_vluxei_mask:
13294 case Intrinsic::riscv_vsoxei:
13295 case Intrinsic::riscv_vsoxei_mask:
13296 case Intrinsic::riscv_vsuxei:
13297 case Intrinsic::riscv_vsuxei_mask:
13298 if (SDValue V = narrowIndex(N->getOperand(4), DAG)) {
13299 SmallVector<SDValue, 8> Ops(N->ops());
13300 Ops[4] = V;
13301 const auto *MemSD = cast<MemIntrinsicSDNode>(N);
13302 return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(),
13303 Ops, MemSD->getMemoryVT(),
13304 MemSD->getMemOperand());
13305 }
13306 return SDValue();
13307 }
13308 }
13309 case ISD::BITCAST: {
13311 SDValue N0 = N->getOperand(0);
13312 EVT VT = N->getValueType(0);
13313 EVT SrcVT = N0.getValueType();
13314 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
13315 // type, widen both sides to avoid a trip through memory.
13316 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
13317 VT.isScalarInteger()) {
13318 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
13319 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
13320 Ops[0] = N0;
13321 SDLoc DL(N);
13322 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
13323 N0 = DAG.getBitcast(MVT::i8, N0);
13324 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
13325 }
13326
13327 return SDValue();
13328 }
13329 }
13330
13331 return SDValue();
13332}
13333
13335 EVT XVT, unsigned KeptBits) const {
13336 // For vectors, we don't have a preference..
13337 if (XVT.isVector())
13338 return false;
13339
13340 if (XVT != MVT::i32 && XVT != MVT::i64)
13341 return false;
13342
13343 // We can use sext.w for RV64 or an srai 31 on RV32.
13344 if (KeptBits == 32 || KeptBits == 64)
13345 return true;
13346
13347 // With Zbb we can use sext.h/sext.b.
13348 return Subtarget.hasStdExtZbb() &&
13349 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
13350 KeptBits == 16);
13351}
13352
13354 const SDNode *N, CombineLevel Level) const {
13355 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13356 N->getOpcode() == ISD::SRL) &&
13357 "Expected shift op");
13358
13359 // The following folds are only desirable if `(OP _, c1 << c2)` can be
13360 // materialised in fewer instructions than `(OP _, c1)`:
13361 //
13362 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13363 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
13364 SDValue N0 = N->getOperand(0);
13365 EVT Ty = N0.getValueType();
13366 if (Ty.isScalarInteger() &&
13367 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
13368 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13369 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13370 if (C1 && C2) {
13371 const APInt &C1Int = C1->getAPIntValue();
13372 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
13373
13374 // We can materialise `c1 << c2` into an add immediate, so it's "free",
13375 // and the combine should happen, to potentially allow further combines
13376 // later.
13377 if (ShiftedC1Int.getSignificantBits() <= 64 &&
13378 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
13379 return true;
13380
13381 // We can materialise `c1` in an add immediate, so it's "free", and the
13382 // combine should be prevented.
13383 if (C1Int.getSignificantBits() <= 64 &&
13385 return false;
13386
13387 // Neither constant will fit into an immediate, so find materialisation
13388 // costs.
13389 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
13390 Subtarget.getFeatureBits(),
13391 /*CompressionCost*/true);
13392 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
13393 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
13394 /*CompressionCost*/true);
13395
13396 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
13397 // combine should be prevented.
13398 if (C1Cost < ShiftedC1Cost)
13399 return false;
13400 }
13401 }
13402 return true;
13403}
13404
13406 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
13407 TargetLoweringOpt &TLO) const {
13408 // Delay this optimization as late as possible.
13409 if (!TLO.LegalOps)
13410 return false;
13411
13412 EVT VT = Op.getValueType();
13413 if (VT.isVector())
13414 return false;
13415
13416 unsigned Opcode = Op.getOpcode();
13417 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
13418 return false;
13419
13420 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
13421 if (!C)
13422 return false;
13423
13424 const APInt &Mask = C->getAPIntValue();
13425
13426 // Clear all non-demanded bits initially.
13427 APInt ShrunkMask = Mask & DemandedBits;
13428
13429 // Try to make a smaller immediate by setting undemanded bits.
13430
13431 APInt ExpandedMask = Mask | ~DemandedBits;
13432
13433 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
13434 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
13435 };
13436 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
13437 if (NewMask == Mask)
13438 return true;
13439 SDLoc DL(Op);
13440 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
13441 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
13442 Op.getOperand(0), NewC);
13443 return TLO.CombineTo(Op, NewOp);
13444 };
13445
13446 // If the shrunk mask fits in sign extended 12 bits, let the target
13447 // independent code apply it.
13448 if (ShrunkMask.isSignedIntN(12))
13449 return false;
13450
13451 // And has a few special cases for zext.
13452 if (Opcode == ISD::AND) {
13453 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
13454 // otherwise use SLLI + SRLI.
13455 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
13456 if (IsLegalMask(NewMask))
13457 return UseMask(NewMask);
13458
13459 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
13460 if (VT == MVT::i64) {
13461 APInt NewMask = APInt(64, 0xffffffff);
13462 if (IsLegalMask(NewMask))
13463 return UseMask(NewMask);
13464 }
13465 }
13466
13467 // For the remaining optimizations, we need to be able to make a negative
13468 // number through a combination of mask and undemanded bits.
13469 if (!ExpandedMask.isNegative())
13470 return false;
13471
13472 // What is the fewest number of bits we need to represent the negative number.
13473 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
13474
13475 // Try to make a 12 bit negative immediate. If that fails try to make a 32
13476 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
13477 // If we can't create a simm12, we shouldn't change opaque constants.
13478 APInt NewMask = ShrunkMask;
13479 if (MinSignedBits <= 12)
13480 NewMask.setBitsFrom(11);
13481 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
13482 NewMask.setBitsFrom(31);
13483 else
13484 return false;
13485
13486 // Check that our new mask is a subset of the demanded mask.
13487 assert(IsLegalMask(NewMask));
13488 return UseMask(NewMask);
13489}
13490
13491static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
13492 static const uint64_t GREVMasks[] = {
13493 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
13494 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
13495
13496 for (unsigned Stage = 0; Stage != 6; ++Stage) {
13497 unsigned Shift = 1 << Stage;
13498 if (ShAmt & Shift) {
13499 uint64_t Mask = GREVMasks[Stage];
13500 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
13501 if (IsGORC)
13502 Res |= x;
13503 x = Res;
13504 }
13505 }
13506
13507 return x;
13508}
13509
13511 KnownBits &Known,
13512 const APInt &DemandedElts,
13513 const SelectionDAG &DAG,
13514 unsigned Depth) const {
13515 unsigned BitWidth = Known.getBitWidth();
13516 unsigned Opc = Op.getOpcode();
13517 assert((Opc >= ISD::BUILTIN_OP_END ||
13518 Opc == ISD::INTRINSIC_WO_CHAIN ||
13519 Opc == ISD::INTRINSIC_W_CHAIN ||
13520 Opc == ISD::INTRINSIC_VOID) &&
13521 "Should use MaskedValueIsZero if you don't know whether Op"
13522 " is a target node!");
13523
13524 Known.resetAll();
13525 switch (Opc) {
13526 default: break;
13527 case RISCVISD::SELECT_CC: {
13528 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
13529 // If we don't know any bits, early out.
13530 if (Known.isUnknown())
13531 break;
13532 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
13533
13534 // Only known if known in both the LHS and RHS.
13535 Known = Known.intersectWith(Known2);
13536 break;
13537 }
13540 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13541 // Result is either all zero or operand 0. We can propagate zeros, but not
13542 // ones.
13543 Known.One.clearAllBits();
13544 break;
13545 case RISCVISD::REMUW: {
13546 KnownBits Known2;
13547 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
13548 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
13549 // We only care about the lower 32 bits.
13550 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
13551 // Restore the original width by sign extending.
13552 Known = Known.sext(BitWidth);
13553 break;
13554 }
13555 case RISCVISD::DIVUW: {
13556 KnownBits Known2;
13557 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
13558 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
13559 // We only care about the lower 32 bits.
13560 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
13561 // Restore the original width by sign extending.
13562 Known = Known.sext(BitWidth);
13563 break;
13564 }
13565 case RISCVISD::CTZW: {
13566 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13567 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
13568 unsigned LowBits = llvm::bit_width(PossibleTZ);
13569 Known.Zero.setBitsFrom(LowBits);
13570 break;
13571 }
13572 case RISCVISD::CLZW: {
13573 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13574 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
13575 unsigned LowBits = llvm::bit_width(PossibleLZ);
13576 Known.Zero.setBitsFrom(LowBits);
13577 break;
13578 }
13579 case RISCVISD::BREV8:
13580 case RISCVISD::ORC_B: {
13581 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
13582 // control value of 7 is equivalent to brev8 and orc.b.
13583 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13584 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
13585 // To compute zeros, we need to invert the value and invert it back after.
13586 Known.Zero =
13587 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
13588 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
13589 break;
13590 }
13591 case RISCVISD::READ_VLENB: {
13592 // We can use the minimum and maximum VLEN values to bound VLENB. We
13593 // know VLEN must be a power of two.
13594 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
13595 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
13596 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
13597 Known.Zero.setLowBits(Log2_32(MinVLenB));
13598 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
13599 if (MaxVLenB == MinVLenB)
13600 Known.One.setBit(Log2_32(MinVLenB));
13601 break;
13602 }
13603 case RISCVISD::FPCLASS: {
13604 // fclass will only set one of the low 10 bits.
13605 Known.Zero.setBitsFrom(10);
13606 break;
13607 }
13610 unsigned IntNo =
13611 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
13612 switch (IntNo) {
13613 default:
13614 // We can't do anything for most intrinsics.
13615 break;
13616 case Intrinsic::riscv_vsetvli:
13617 case Intrinsic::riscv_vsetvlimax:
13618 // Assume that VL output is >= 65536.
13619 // TODO: Take SEW and LMUL into account.
13620 if (BitWidth > 17)
13621 Known.Zero.setBitsFrom(17);
13622 break;
13623 }
13624 break;
13625 }
13626 }
13627}
13628
13630 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
13631 unsigned Depth) const {
13632 switch (Op.getOpcode()) {
13633 default:
13634 break;
13635 case RISCVISD::SELECT_CC: {
13636 unsigned Tmp =
13637 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
13638 if (Tmp == 1) return 1; // Early out.
13639 unsigned Tmp2 =
13640 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
13641 return std::min(Tmp, Tmp2);
13642 }
13645 // Output is either all zero or operand 0. We can propagate sign bit count
13646 // from operand 0.
13647 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
13648 case RISCVISD::ABSW: {
13649 // We expand this at isel to negw+max. The result will have 33 sign bits
13650 // if the input has at least 33 sign bits.
13651 unsigned Tmp =
13652 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
13653 if (Tmp < 33) return 1;
13654 return 33;
13655 }
13656 case RISCVISD::SLLW:
13657 case RISCVISD::SRAW:
13658 case RISCVISD::SRLW:
13659 case RISCVISD::DIVW:
13660 case RISCVISD::DIVUW:
13661 case RISCVISD::REMUW:
13662 case RISCVISD::ROLW:
13663 case RISCVISD::RORW:
13668 // TODO: As the result is sign-extended, this is conservatively correct. A
13669 // more precise answer could be calculated for SRAW depending on known
13670 // bits in the shift amount.
13671 return 33;
13672 case RISCVISD::VMV_X_S: {
13673 // The number of sign bits of the scalar result is computed by obtaining the
13674 // element type of the input vector operand, subtracting its width from the
13675 // XLEN, and then adding one (sign bit within the element type). If the
13676 // element type is wider than XLen, the least-significant XLEN bits are
13677 // taken.
13678 unsigned XLen = Subtarget.getXLen();
13679 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
13680 if (EltBits <= XLen)
13681 return XLen - EltBits + 1;
13682 break;
13683 }
13685 unsigned IntNo = Op.getConstantOperandVal(1);
13686 switch (IntNo) {
13687 default:
13688 break;
13689 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
13690 case Intrinsic::riscv_masked_atomicrmw_add_i64:
13691 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
13692 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
13693 case Intrinsic::riscv_masked_atomicrmw_max_i64:
13694 case Intrinsic::riscv_masked_atomicrmw_min_i64:
13695 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
13696 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
13697 case Intrinsic::riscv_masked_cmpxchg_i64:
13698 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
13699 // narrow atomic operation. These are implemented using atomic
13700 // operations at the minimum supported atomicrmw/cmpxchg width whose
13701 // result is then sign extended to XLEN. With +A, the minimum width is
13702 // 32 for both 64 and 32.
13703 assert(Subtarget.getXLen() == 64);
13705 assert(Subtarget.hasStdExtA());
13706 return 33;
13707 }
13708 }
13709 }
13710
13711 return 1;
13712}
13713
13714const Constant *
13716 assert(Ld && "Unexpected null LoadSDNode");
13717 if (!ISD::isNormalLoad(Ld))
13718 return nullptr;
13719
13720 SDValue Ptr = Ld->getBasePtr();
13721
13722 // Only constant pools with no offset are supported.
13723 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
13724 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
13725 if (!CNode || CNode->isMachineConstantPoolEntry() ||
13726 CNode->getOffset() != 0)
13727 return nullptr;
13728
13729 return CNode;
13730 };
13731
13732 // Simple case, LLA.
13733 if (Ptr.getOpcode() == RISCVISD::LLA) {
13734 auto *CNode = GetSupportedConstantPool(Ptr);
13735 if (!CNode || CNode->getTargetFlags() != 0)
13736 return nullptr;
13737
13738 return CNode->getConstVal();
13739 }
13740
13741 // Look for a HI and ADD_LO pair.
13742 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
13743 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
13744 return nullptr;
13745
13746 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
13747 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
13748
13749 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
13750 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
13751 return nullptr;
13752
13753 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
13754 return nullptr;
13755
13756 return CNodeLo->getConstVal();
13757}
13758
13760 MachineBasicBlock *BB) {
13761 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
13762
13763 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
13764 // Should the count have wrapped while it was being read, we need to try
13765 // again.
13766 // ...
13767 // read:
13768 // rdcycleh x3 # load high word of cycle
13769 // rdcycle x2 # load low word of cycle
13770 // rdcycleh x4 # load high word of cycle
13771 // bne x3, x4, read # check if high word reads match, otherwise try again
13772 // ...
13773
13774 MachineFunction &MF = *BB->getParent();
13775 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13777
13778 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
13779 MF.insert(It, LoopMBB);
13780
13781 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
13782 MF.insert(It, DoneMBB);
13783
13784 // Transfer the remainder of BB and its successor edges to DoneMBB.
13785 DoneMBB->splice(DoneMBB->begin(), BB,
13786 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13788
13789 BB->addSuccessor(LoopMBB);
13790
13792 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
13793 Register LoReg = MI.getOperand(0).getReg();
13794 Register HiReg = MI.getOperand(1).getReg();
13795 DebugLoc DL = MI.getDebugLoc();
13796
13798 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
13799 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
13800 .addReg(RISCV::X0);
13801 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
13802 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
13803 .addReg(RISCV::X0);
13804 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
13805 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
13806 .addReg(RISCV::X0);
13807
13808 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
13809 .addReg(HiReg)
13810 .addReg(ReadAgainReg)
13811 .addMBB(LoopMBB);
13812
13813 LoopMBB->addSuccessor(LoopMBB);
13814 LoopMBB->addSuccessor(DoneMBB);
13815
13816 MI.eraseFromParent();
13817
13818 return DoneMBB;
13819}
13820
13823 const RISCVSubtarget &Subtarget) {
13824 assert((MI.getOpcode() == RISCV::SplitF64Pseudo ||
13825 MI.getOpcode() == RISCV::SplitF64Pseudo_INX) &&
13826 "Unexpected instruction");
13827
13828 MachineFunction &MF = *BB->getParent();
13829 DebugLoc DL = MI.getDebugLoc();
13832 Register LoReg = MI.getOperand(0).getReg();
13833 Register HiReg = MI.getOperand(1).getReg();
13834 Register SrcReg = MI.getOperand(2).getReg();
13835
13836 const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX
13837 ? &RISCV::GPRPF64RegClass
13838 : &RISCV::FPR64RegClass;
13839 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
13840
13841 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
13842 RI, Register());
13844 MachineMemOperand *MMOLo =
13848 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
13849 .addFrameIndex(FI)
13850 .addImm(0)
13851 .addMemOperand(MMOLo);
13852 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
13853 .addFrameIndex(FI)
13854 .addImm(4)
13855 .addMemOperand(MMOHi);
13856 MI.eraseFromParent(); // The pseudo instruction is gone now.
13857 return BB;
13858}
13859
13862 const RISCVSubtarget &Subtarget) {
13863 assert((MI.getOpcode() == RISCV::BuildPairF64Pseudo ||
13864 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX) &&
13865 "Unexpected instruction");
13866
13867 MachineFunction &MF = *BB->getParent();
13868 DebugLoc DL = MI.getDebugLoc();
13871 Register DstReg = MI.getOperand(0).getReg();
13872 Register LoReg = MI.getOperand(1).getReg();
13873 Register HiReg = MI.getOperand(2).getReg();
13874
13875 const TargetRegisterClass *DstRC =
13876 MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass
13877 : &RISCV::FPR64RegClass;
13878 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
13879
13881 MachineMemOperand *MMOLo =
13885 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
13886 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
13887 .addFrameIndex(FI)
13888 .addImm(0)
13889 .addMemOperand(MMOLo);
13890 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
13891 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
13892 .addFrameIndex(FI)
13893 .addImm(4)
13894 .addMemOperand(MMOHi);
13895 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
13896 MI.eraseFromParent(); // The pseudo instruction is gone now.
13897 return BB;
13898}
13899
13901 switch (MI.getOpcode()) {
13902 default:
13903 return false;
13904 case RISCV::Select_GPR_Using_CC_GPR:
13905 case RISCV::Select_FPR16_Using_CC_GPR:
13906 case RISCV::Select_FPR16INX_Using_CC_GPR:
13907 case RISCV::Select_FPR32_Using_CC_GPR:
13908 case RISCV::Select_FPR32INX_Using_CC_GPR:
13909 case RISCV::Select_FPR64_Using_CC_GPR:
13910 case RISCV::Select_FPR64INX_Using_CC_GPR:
13911 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
13912 return true;
13913 }
13914}
13915
13917 unsigned RelOpcode, unsigned EqOpcode,
13918 const RISCVSubtarget &Subtarget) {
13919 DebugLoc DL = MI.getDebugLoc();
13920 Register DstReg = MI.getOperand(0).getReg();
13921 Register Src1Reg = MI.getOperand(1).getReg();
13922 Register Src2Reg = MI.getOperand(2).getReg();
13924 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
13926
13927 // Save the current FFLAGS.
13928 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
13929
13930 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
13931 .addReg(Src1Reg)
13932 .addReg(Src2Reg);
13935
13936 // Restore the FFLAGS.
13937 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
13938 .addReg(SavedFFlags, RegState::Kill);
13939
13940 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
13941 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
13942 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
13943 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
13946
13947 // Erase the pseudoinstruction.
13948 MI.eraseFromParent();
13949 return BB;
13950}
13951
13952static MachineBasicBlock *
13954 MachineBasicBlock *ThisMBB,
13955 const RISCVSubtarget &Subtarget) {
13956 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
13957 // Without this, custom-inserter would have generated:
13958 //
13959 // A
13960 // | \
13961 // | B
13962 // | /
13963 // C
13964 // | \
13965 // | D
13966 // | /
13967 // E
13968 //
13969 // A: X = ...; Y = ...
13970 // B: empty
13971 // C: Z = PHI [X, A], [Y, B]
13972 // D: empty
13973 // E: PHI [X, C], [Z, D]
13974 //
13975 // If we lower both Select_FPRX_ in a single step, we can instead generate:
13976 //
13977 // A
13978 // | \
13979 // | C
13980 // | /|
13981 // |/ |
13982 // | |
13983 // | D
13984 // | /
13985 // E
13986 //
13987 // A: X = ...; Y = ...
13988 // D: empty
13989 // E: PHI [X, A], [X, C], [Y, D]
13990
13991 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
13992 const DebugLoc &DL = First.getDebugLoc();
13993 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
13994 MachineFunction *F = ThisMBB->getParent();
13995 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
13996 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
13997 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13998 MachineFunction::iterator It = ++ThisMBB->getIterator();
13999 F->insert(It, FirstMBB);
14000 F->insert(It, SecondMBB);
14001 F->insert(It, SinkMBB);
14002
14003 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
14004 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
14006 ThisMBB->end());
14007 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
14008
14009 // Fallthrough block for ThisMBB.
14010 ThisMBB->addSuccessor(FirstMBB);
14011 // Fallthrough block for FirstMBB.
14012 FirstMBB->addSuccessor(SecondMBB);
14013 ThisMBB->addSuccessor(SinkMBB);
14014 FirstMBB->addSuccessor(SinkMBB);
14015 // This is fallthrough.
14016 SecondMBB->addSuccessor(SinkMBB);
14017
14018 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
14019 Register FLHS = First.getOperand(1).getReg();
14020 Register FRHS = First.getOperand(2).getReg();
14021 // Insert appropriate branch.
14022 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
14023 .addReg(FLHS)
14024 .addReg(FRHS)
14025 .addMBB(SinkMBB);
14026
14027 Register SLHS = Second.getOperand(1).getReg();
14028 Register SRHS = Second.getOperand(2).getReg();
14029 Register Op1Reg4 = First.getOperand(4).getReg();
14030 Register Op1Reg5 = First.getOperand(5).getReg();
14031
14032 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
14033 // Insert appropriate branch.
14034 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
14035 .addReg(SLHS)
14036 .addReg(SRHS)
14037 .addMBB(SinkMBB);
14038
14039 Register DestReg = Second.getOperand(0).getReg();
14040 Register Op2Reg4 = Second.getOperand(4).getReg();
14041 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
14042 .addReg(Op2Reg4)
14043 .addMBB(ThisMBB)
14044 .addReg(Op1Reg4)
14045 .addMBB(FirstMBB)
14046 .addReg(Op1Reg5)
14047 .addMBB(SecondMBB);
14048
14049 // Now remove the Select_FPRX_s.
14050 First.eraseFromParent();
14051 Second.eraseFromParent();
14052 return SinkMBB;
14053}
14054
14057 const RISCVSubtarget &Subtarget) {
14058 // To "insert" Select_* instructions, we actually have to insert the triangle
14059 // control-flow pattern. The incoming instructions know the destination vreg
14060 // to set, the condition code register to branch on, the true/false values to
14061 // select between, and the condcode to use to select the appropriate branch.
14062 //
14063 // We produce the following control flow:
14064 // HeadMBB
14065 // | \
14066 // | IfFalseMBB
14067 // | /
14068 // TailMBB
14069 //
14070 // When we find a sequence of selects we attempt to optimize their emission
14071 // by sharing the control flow. Currently we only handle cases where we have
14072 // multiple selects with the exact same condition (same LHS, RHS and CC).
14073 // The selects may be interleaved with other instructions if the other
14074 // instructions meet some requirements we deem safe:
14075 // - They are not pseudo instructions.
14076 // - They are debug instructions. Otherwise,
14077 // - They do not have side-effects, do not access memory and their inputs do
14078 // not depend on the results of the select pseudo-instructions.
14079 // The TrueV/FalseV operands of the selects cannot depend on the result of
14080 // previous selects in the sequence.
14081 // These conditions could be further relaxed. See the X86 target for a
14082 // related approach and more information.
14083 //
14084 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
14085 // is checked here and handled by a separate function -
14086 // EmitLoweredCascadedSelect.
14087 Register LHS = MI.getOperand(1).getReg();
14088 Register RHS = MI.getOperand(2).getReg();
14089 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
14090
14091 SmallVector<MachineInstr *, 4> SelectDebugValues;
14092 SmallSet<Register, 4> SelectDests;
14093 SelectDests.insert(MI.getOperand(0).getReg());
14094
14095 MachineInstr *LastSelectPseudo = &MI;
14096 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
14097 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
14098 Next->getOpcode() == MI.getOpcode() &&
14099 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
14100 Next->getOperand(5).isKill()) {
14101 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
14102 }
14103
14104 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
14105 SequenceMBBI != E; ++SequenceMBBI) {
14106 if (SequenceMBBI->isDebugInstr())
14107 continue;
14108 if (isSelectPseudo(*SequenceMBBI)) {
14109 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
14110 SequenceMBBI->getOperand(2).getReg() != RHS ||
14111 SequenceMBBI->getOperand(3).getImm() != CC ||
14112 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
14113 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
14114 break;
14115 LastSelectPseudo = &*SequenceMBBI;
14116 SequenceMBBI->collectDebugValues(SelectDebugValues);
14117 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
14118 continue;
14119 }
14120 if (SequenceMBBI->hasUnmodeledSideEffects() ||
14121 SequenceMBBI->mayLoadOrStore() ||
14122 SequenceMBBI->usesCustomInsertionHook())
14123 break;
14124 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
14125 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
14126 }))
14127 break;
14128 }
14129
14130 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
14131 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14132 DebugLoc DL = MI.getDebugLoc();
14134
14135 MachineBasicBlock *HeadMBB = BB;
14136 MachineFunction *F = BB->getParent();
14137 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
14138 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
14139
14140 F->insert(I, IfFalseMBB);
14141 F->insert(I, TailMBB);
14142
14143 // Transfer debug instructions associated with the selects to TailMBB.
14144 for (MachineInstr *DebugInstr : SelectDebugValues) {
14145 TailMBB->push_back(DebugInstr->removeFromParent());
14146 }
14147
14148 // Move all instructions after the sequence to TailMBB.
14149 TailMBB->splice(TailMBB->end(), HeadMBB,
14150 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
14151 // Update machine-CFG edges by transferring all successors of the current
14152 // block to the new block which will contain the Phi nodes for the selects.
14153 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
14154 // Set the successors for HeadMBB.
14155 HeadMBB->addSuccessor(IfFalseMBB);
14156 HeadMBB->addSuccessor(TailMBB);
14157
14158 // Insert appropriate branch.
14159 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
14160 .addReg(LHS)
14161 .addReg(RHS)
14162 .addMBB(TailMBB);
14163
14164 // IfFalseMBB just falls through to TailMBB.
14165 IfFalseMBB->addSuccessor(TailMBB);
14166
14167 // Create PHIs for all of the select pseudo-instructions.
14168 auto SelectMBBI = MI.getIterator();
14169 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
14170 auto InsertionPoint = TailMBB->begin();
14171 while (SelectMBBI != SelectEnd) {
14172 auto Next = std::next(SelectMBBI);
14173 if (isSelectPseudo(*SelectMBBI)) {
14174 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
14175 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
14176 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
14177 .addReg(SelectMBBI->getOperand(4).getReg())
14178 .addMBB(HeadMBB)
14179 .addReg(SelectMBBI->getOperand(5).getReg())
14180 .addMBB(IfFalseMBB);
14181 SelectMBBI->eraseFromParent();
14182 }
14183 SelectMBBI = Next;
14184 }
14185
14186 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
14187 return TailMBB;
14188}
14189
14191 unsigned Opcode) {
14192 DebugLoc DL = MI.getDebugLoc();
14193
14195
14197 Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass);
14198
14199 assert(MI.getNumOperands() == 8 || MI.getNumOperands() == 7);
14200 unsigned FRMIdx = MI.getNumOperands() == 8 ? 4 : 3;
14201
14202 // Update FRM and save the old value.
14203 BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM)
14204 .addImm(MI.getOperand(FRMIdx).getImm());
14205
14206 // Emit an VFCVT with the FRM == DYN
14207 auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode));
14208
14209 for (unsigned I = 0; I < MI.getNumOperands(); I++)
14210 if (I != FRMIdx)
14211 MIB = MIB.add(MI.getOperand(I));
14212 else
14213 MIB = MIB.add(MachineOperand::CreateImm(7)); // frm = DYN
14214
14215 MIB.add(MachineOperand::CreateReg(RISCV::FRM,
14216 /*IsDef*/ false,
14217 /*IsImp*/ true));
14218
14221
14222 // Restore FRM.
14223 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM))
14224 .addReg(SavedFRM, RegState::Kill);
14225
14226 // Erase the pseudoinstruction.
14227 MI.eraseFromParent();
14228 return BB;
14229}
14230
14233 unsigned CVTXOpc,
14234 unsigned CVTFOpc) {
14235 DebugLoc DL = MI.getDebugLoc();
14236
14238
14240 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
14241
14242 // Save the old value of FFLAGS.
14243 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
14244
14245 assert(MI.getNumOperands() == 7);
14246
14247 // Emit a VFCVT_X_F
14248 const TargetRegisterInfo *TRI =
14250 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
14251 Register Tmp = MRI.createVirtualRegister(RC);
14252 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
14253 .add(MI.getOperand(1))
14254 .add(MI.getOperand(2))
14255 .add(MI.getOperand(3))
14256 .add(MachineOperand::CreateImm(7)) // frm = DYN
14257 .add(MI.getOperand(4))
14258 .add(MI.getOperand(5))
14259 .add(MI.getOperand(6))
14260 .add(MachineOperand::CreateReg(RISCV::FRM,
14261 /*IsDef*/ false,
14262 /*IsImp*/ true));
14263
14264 // Emit a VFCVT_F_X
14265 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
14266 .add(MI.getOperand(0))
14267 .add(MI.getOperand(1))
14268 .addReg(Tmp)
14269 .add(MI.getOperand(3))
14270 .add(MachineOperand::CreateImm(7)) // frm = DYN
14271 .add(MI.getOperand(4))
14272 .add(MI.getOperand(5))
14273 .add(MI.getOperand(6))
14274 .add(MachineOperand::CreateReg(RISCV::FRM,
14275 /*IsDef*/ false,
14276 /*IsImp*/ true));
14277
14278 // Restore FFLAGS.
14279 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
14280 .addReg(SavedFFLAGS, RegState::Kill);
14281
14282 // Erase the pseudoinstruction.
14283 MI.eraseFromParent();
14284 return BB;
14285}
14286
14288 const RISCVSubtarget &Subtarget) {
14289 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
14290 const TargetRegisterClass *RC;
14291 switch (MI.getOpcode()) {
14292 default:
14293 llvm_unreachable("Unexpected opcode");
14294 case RISCV::PseudoFROUND_H:
14295 CmpOpc = RISCV::FLT_H;
14296 F2IOpc = RISCV::FCVT_W_H;
14297 I2FOpc = RISCV::FCVT_H_W;
14298 FSGNJOpc = RISCV::FSGNJ_H;
14299 FSGNJXOpc = RISCV::FSGNJX_H;
14300 RC = &RISCV::FPR16RegClass;
14301 break;
14302 case RISCV::PseudoFROUND_H_INX:
14303 CmpOpc = RISCV::FLT_H_INX;
14304 F2IOpc = RISCV::FCVT_W_H_INX;
14305 I2FOpc = RISCV::FCVT_H_W_INX;
14306 FSGNJOpc = RISCV::FSGNJ_H_INX;
14307 FSGNJXOpc = RISCV::FSGNJX_H_INX;
14308 RC = &RISCV::GPRF16RegClass;
14309 break;
14310 case RISCV::PseudoFROUND_S:
14311 CmpOpc = RISCV::FLT_S;
14312 F2IOpc = RISCV::FCVT_W_S;
14313 I2FOpc = RISCV::FCVT_S_W;
14314 FSGNJOpc = RISCV::FSGNJ_S;
14315 FSGNJXOpc = RISCV::FSGNJX_S;
14316 RC = &RISCV::FPR32RegClass;
14317 break;
14318 case RISCV::PseudoFROUND_S_INX:
14319 CmpOpc = RISCV::FLT_S_INX;
14320 F2IOpc = RISCV::FCVT_W_S_INX;
14321 I2FOpc = RISCV::FCVT_S_W_INX;
14322 FSGNJOpc = RISCV::FSGNJ_S_INX;
14323 FSGNJXOpc = RISCV::FSGNJX_S_INX;
14324 RC = &RISCV::GPRF32RegClass;
14325 break;
14326 case RISCV::PseudoFROUND_D:
14327 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
14328 CmpOpc = RISCV::FLT_D;
14329 F2IOpc = RISCV::FCVT_L_D;
14330 I2FOpc = RISCV::FCVT_D_L;
14331 FSGNJOpc = RISCV::FSGNJ_D;
14332 FSGNJXOpc = RISCV::FSGNJX_D;
14333 RC = &RISCV::FPR64RegClass;
14334 break;
14335 case RISCV::PseudoFROUND_D_INX:
14336 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
14337 CmpOpc = RISCV::FLT_D_INX;
14338 F2IOpc = RISCV::FCVT_L_D_INX;
14339 I2FOpc = RISCV::FCVT_D_L_INX;
14340 FSGNJOpc = RISCV::FSGNJ_D_INX;
14341 FSGNJXOpc = RISCV::FSGNJX_D_INX;
14342 RC = &RISCV::GPRRegClass;
14343 break;
14344 }
14345
14346 const BasicBlock *BB = MBB->getBasicBlock();
14347 DebugLoc DL = MI.getDebugLoc();
14349
14351 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
14352 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
14353
14354 F->insert(I, CvtMBB);
14355 F->insert(I, DoneMBB);
14356 // Move all instructions after the sequence to DoneMBB.
14357 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
14358 MBB->end());
14359 // Update machine-CFG edges by transferring all successors of the current
14360 // block to the new block which will contain the Phi nodes for the selects.
14362 // Set the successors for MBB.
14363 MBB->addSuccessor(CvtMBB);
14364 MBB->addSuccessor(DoneMBB);
14365
14366 Register DstReg = MI.getOperand(0).getReg();
14367 Register SrcReg = MI.getOperand(1).getReg();
14368 Register MaxReg = MI.getOperand(2).getReg();
14369 int64_t FRM = MI.getOperand(3).getImm();
14370
14371 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
14373
14374 Register FabsReg = MRI.createVirtualRegister(RC);
14375 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
14376
14377 // Compare the FP value to the max value.
14378 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
14379 auto MIB =
14380 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
14383
14384 // Insert branch.
14385 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
14386 .addReg(CmpReg)
14387 .addReg(RISCV::X0)
14388 .addMBB(DoneMBB);
14389
14390 CvtMBB->addSuccessor(DoneMBB);
14391
14392 // Convert to integer.
14393 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
14394 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
14397
14398 // Convert back to FP.
14399 Register I2FReg = MRI.createVirtualRegister(RC);
14400 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
14403
14404 // Restore the sign bit.
14405 Register CvtReg = MRI.createVirtualRegister(RC);
14406 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
14407
14408 // Merge the results.
14409 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
14410 .addReg(SrcReg)
14411 .addMBB(MBB)
14412 .addReg(CvtReg)
14413 .addMBB(CvtMBB);
14414
14415 MI.eraseFromParent();
14416 return DoneMBB;
14417}
14418
14421 MachineBasicBlock *BB) const {
14422 switch (MI.getOpcode()) {
14423 default:
14424 llvm_unreachable("Unexpected instr type to insert");
14425 case RISCV::ReadCycleWide:
14426 assert(!Subtarget.is64Bit() &&
14427 "ReadCycleWrite is only to be used on riscv32");
14428 return emitReadCycleWidePseudo(MI, BB);
14429 case RISCV::Select_GPR_Using_CC_GPR:
14430 case RISCV::Select_FPR16_Using_CC_GPR:
14431 case RISCV::Select_FPR16INX_Using_CC_GPR:
14432 case RISCV::Select_FPR32_Using_CC_GPR:
14433 case RISCV::Select_FPR32INX_Using_CC_GPR:
14434 case RISCV::Select_FPR64_Using_CC_GPR:
14435 case RISCV::Select_FPR64INX_Using_CC_GPR:
14436 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
14437 return emitSelectPseudo(MI, BB, Subtarget);
14438 case RISCV::BuildPairF64Pseudo:
14439 case RISCV::BuildPairF64Pseudo_INX:
14440 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
14441 case RISCV::SplitF64Pseudo:
14442 case RISCV::SplitF64Pseudo_INX:
14443 return emitSplitF64Pseudo(MI, BB, Subtarget);
14444 case RISCV::PseudoQuietFLE_H:
14445 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
14446 case RISCV::PseudoQuietFLE_H_INX:
14447 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
14448 case RISCV::PseudoQuietFLT_H:
14449 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
14450 case RISCV::PseudoQuietFLT_H_INX:
14451 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
14452 case RISCV::PseudoQuietFLE_S:
14453 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
14454 case RISCV::PseudoQuietFLE_S_INX:
14455 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
14456 case RISCV::PseudoQuietFLT_S:
14457 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
14458 case RISCV::PseudoQuietFLT_S_INX:
14459 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
14460 case RISCV::PseudoQuietFLE_D:
14461 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
14462 case RISCV::PseudoQuietFLE_D_INX:
14463 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
14464 case RISCV::PseudoQuietFLE_D_IN32X:
14465 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
14466 Subtarget);
14467 case RISCV::PseudoQuietFLT_D:
14468 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
14469 case RISCV::PseudoQuietFLT_D_INX:
14470 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
14471 case RISCV::PseudoQuietFLT_D_IN32X:
14472 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
14473 Subtarget);
14474
14475#define PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, LMUL) \
14476 case RISCV::RMOpc##_##LMUL: \
14477 return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL); \
14478 case RISCV::RMOpc##_##LMUL##_MASK: \
14479 return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL##_MASK);
14480
14481#define PseudoVFCVT_RM_CASE(RMOpc, Opc) \
14482 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M1) \
14483 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M2) \
14484 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M4) \
14485 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF2) \
14486 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF4)
14487
14488#define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc) \
14489 PseudoVFCVT_RM_CASE(RMOpc, Opc) \
14490 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M8)
14491
14492#define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc) \
14493 PseudoVFCVT_RM_CASE(RMOpc, Opc) \
14494 PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF8)
14495
14496 // VFCVT
14497 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_X_F_V, PseudoVFCVT_X_F_V)
14498 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_XU_F_V, PseudoVFCVT_XU_F_V)
14499 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_XU_V, PseudoVFCVT_F_XU_V)
14500 PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_X_V, PseudoVFCVT_F_X_V)
14501
14502 // VFWCVT
14503 PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_XU_F_V, PseudoVFWCVT_XU_F_V);
14504 PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_X_F_V, PseudoVFWCVT_X_F_V);
14505
14506 // VFNCVT
14507 PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_XU_F_W, PseudoVFNCVT_XU_F_W);
14508 PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_X_F_W, PseudoVFNCVT_X_F_W);
14509 PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_XU_W, PseudoVFNCVT_F_XU_W);
14510 PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_X_W, PseudoVFNCVT_F_X_W);
14511
14512 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
14513 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
14514 RISCV::PseudoVFCVT_F_X_V_M1_MASK);
14515 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
14516 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK,
14517 RISCV::PseudoVFCVT_F_X_V_M2_MASK);
14518 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
14519 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK,
14520 RISCV::PseudoVFCVT_F_X_V_M4_MASK);
14521 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
14522 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK,
14523 RISCV::PseudoVFCVT_F_X_V_M8_MASK);
14524 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
14525 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK,
14526 RISCV::PseudoVFCVT_F_X_V_MF2_MASK);
14527 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
14528 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK,
14529 RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
14530 case RISCV::PseudoFROUND_H:
14531 case RISCV::PseudoFROUND_H_INX:
14532 case RISCV::PseudoFROUND_S:
14533 case RISCV::PseudoFROUND_S_INX:
14534 case RISCV::PseudoFROUND_D:
14535 case RISCV::PseudoFROUND_D_INX:
14536 case RISCV::PseudoFROUND_D_IN32X:
14537 return emitFROUND(MI, BB, Subtarget);
14538 }
14539}
14540
14541// Returns the index to the rounding mode immediate value if any, otherwise the
14542// function will return None.
14543static std::optional<unsigned> getRoundModeIdx(const MachineInstr &MI) {
14544 uint64_t TSFlags = MI.getDesc().TSFlags;
14546 return std::nullopt;
14547
14548 // The operand order
14549 // -------------------------------------
14550 // | n-1 (if any) | n-2 | n-3 | n-4 |
14551 // | policy | sew | vl | rm |
14552 // -------------------------------------
14553 return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3;
14554}
14555
14557 SDNode *Node) const {
14558 // Add FRM dependency to vector floating-point instructions with dynamic
14559 // rounding mode.
14560 if (auto RoundModeIdx = getRoundModeIdx(MI)) {
14561 unsigned FRMImm = MI.getOperand(*RoundModeIdx).getImm();
14562 if (FRMImm == RISCVFPRndMode::DYN && !MI.readsRegister(RISCV::FRM)) {
14563 MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false,
14564 /*isImp*/ true));
14565 }
14566 }
14567
14568 // Add FRM dependency to any instructions with dynamic rounding mode.
14569 unsigned Opc = MI.getOpcode();
14570 auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
14571 if (Idx < 0)
14572 return;
14573 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
14574 return;
14575 // If the instruction already reads FRM, don't add another read.
14576 if (MI.readsRegister(RISCV::FRM))
14577 return;
14578 MI.addOperand(
14579 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
14580}
14581
14582// Calling Convention Implementation.
14583// The expectations for frontend ABI lowering vary from target to target.
14584// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
14585// details, but this is a longer term goal. For now, we simply try to keep the
14586// role of the frontend as simple and well-defined as possible. The rules can
14587// be summarised as:
14588// * Never split up large scalar arguments. We handle them here.
14589// * If a hardfloat calling convention is being used, and the struct may be
14590// passed in a pair of registers (fp+fp, int+fp), and both registers are
14591// available, then pass as two separate arguments. If either the GPRs or FPRs
14592// are exhausted, then pass according to the rule below.
14593// * If a struct could never be passed in registers or directly in a stack
14594// slot (as it is larger than 2*XLEN and the floating point rules don't
14595// apply), then pass it using a pointer with the byval attribute.
14596// * If a struct is less than 2*XLEN, then coerce to either a two-element
14597// word-sized array or a 2*XLEN scalar (depending on alignment).
14598// * The frontend can determine whether a struct is returned by reference or
14599// not based on its size and fields. If it will be returned by reference, the
14600// frontend must modify the prototype so a pointer with the sret annotation is
14601// passed as the first argument. This is not necessary for large scalar
14602// returns.
14603// * Struct return values and varargs should be coerced to structs containing
14604// register-size fields in the same situations they would be for fixed
14605// arguments.
14606
14607static const MCPhysReg ArgGPRs[] = {
14608 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
14609 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
14610};
14611static const MCPhysReg ArgFPR16s[] = {
14612 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
14613 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
14614};
14615static const MCPhysReg ArgFPR32s[] = {
14616 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
14617 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
14618};
14619static const MCPhysReg ArgFPR64s[] = {
14620 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
14621 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
14622};
14623// This is an interim calling convention and it may be changed in the future.
14624static const MCPhysReg ArgVRs[] = {
14625 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
14626 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
14627 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
14628static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
14629 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
14630 RISCV::V20M2, RISCV::V22M2};
14631static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
14632 RISCV::V20M4};
14633static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
14634
14635// Pass a 2*XLEN argument that has been split into two XLEN values through
14636// registers or the stack as necessary.
14637static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
14638 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
14639 MVT ValVT2, MVT LocVT2,
14640 ISD::ArgFlagsTy ArgFlags2) {
14641 unsigned XLenInBytes = XLen / 8;
14642 if (Register Reg = State.AllocateReg(ArgGPRs)) {
14643 // At least one half can be passed via register.
14644 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
14645 VA1.getLocVT(), CCValAssign::Full));
14646 } else {
14647 // Both halves must be passed on the stack, with proper alignment.
14648 Align StackAlign =
14649 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
14650 State.addLoc(
14652 State.AllocateStack(XLenInBytes, StackAlign),
14653 VA1.getLocVT(), CCValAssign::Full));
14654 State.addLoc(CCValAssign::getMem(
14655 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
14656 LocVT2, CCValAssign::Full));
14657 return false;
14658 }
14659
14660 if (Register Reg = State.AllocateReg(ArgGPRs)) {
14661 // The second half can also be passed via register.
14662 State.addLoc(
14663 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
14664 } else {
14665 // The second half is passed via the stack, without additional alignment.
14666 State.addLoc(CCValAssign::getMem(
14667 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
14668 LocVT2, CCValAssign::Full));
14669 }
14670
14671 return false;
14672}
14673
14674static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
14675 std::optional<unsigned> FirstMaskArgument,
14676 CCState &State, const RISCVTargetLowering &TLI) {
14677 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
14678 if (RC == &RISCV::VRRegClass) {
14679 // Assign the first mask argument to V0.
14680 // This is an interim calling convention and it may be changed in the
14681 // future.
14682 if (FirstMaskArgument && ValNo == *FirstMaskArgument)
14683 return State.AllocateReg(RISCV::V0);
14684 return State.AllocateReg(ArgVRs);
14685 }
14686 if (RC == &RISCV::VRM2RegClass)
14687 return State.AllocateReg(ArgVRM2s);
14688 if (RC == &RISCV::VRM4RegClass)
14689 return State.AllocateReg(ArgVRM4s);
14690 if (RC == &RISCV::VRM8RegClass)
14691 return State.AllocateReg(ArgVRM8s);
14692 llvm_unreachable("Unhandled register class for ValueType");
14693}
14694
14695// Implements the RISC-V calling convention. Returns true upon failure.
14696bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
14697 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
14698 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
14699 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
14700 std::optional<unsigned> FirstMaskArgument) {
14701 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
14702 assert(XLen == 32 || XLen == 64);
14703 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
14704
14705 // Static chain parameter must not be passed in normal argument registers,
14706 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
14707 if (ArgFlags.isNest()) {
14708 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
14709 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
14710 return false;
14711 }
14712 }
14713
14714 // Any return value split in to more than two values can't be returned
14715 // directly. Vectors are returned via the available vector registers.
14716 if (!LocVT.isVector() && IsRet && ValNo > 1)
14717 return true;
14718
14719 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
14720 // variadic argument, or if no F16/F32 argument registers are available.
14721 bool UseGPRForF16_F32 = true;
14722 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
14723 // variadic argument, or if no F64 argument registers are available.
14724 bool UseGPRForF64 = true;
14725
14726 switch (ABI) {
14727 default:
14728 llvm_unreachable("Unexpected ABI");
14730 case RISCVABI::ABI_LP64:
14731 break;
14734 UseGPRForF16_F32 = !IsFixed;
14735 break;
14738 UseGPRForF16_F32 = !IsFixed;
14739 UseGPRForF64 = !IsFixed;
14740 break;
14741 }
14742
14743 // FPR16, FPR32, and FPR64 alias each other.
14744 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
14745 UseGPRForF16_F32 = true;
14746 UseGPRForF64 = true;
14747 }
14748
14749 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
14750 // similar local variables rather than directly checking against the target
14751 // ABI.
14752
14753 if (UseGPRForF16_F32 &&
14754 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
14755 LocVT = XLenVT;
14756 LocInfo = CCValAssign::BCvt;
14757 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
14758 LocVT = MVT::i64;
14759 LocInfo = CCValAssign::BCvt;
14760 }
14761
14762 // If this is a variadic argument, the RISC-V calling convention requires
14763 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
14764 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
14765 // be used regardless of whether the original argument was split during
14766 // legalisation or not. The argument will not be passed by registers if the
14767 // original type is larger than 2*XLEN, so the register alignment rule does
14768 // not apply.
14769 unsigned TwoXLenInBytes = (2 * XLen) / 8;
14770 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
14771 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
14772 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
14773 // Skip 'odd' register if necessary.
14774 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
14775 State.AllocateReg(ArgGPRs);
14776 }
14777
14778 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
14779 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
14780 State.getPendingArgFlags();
14781
14782 assert(PendingLocs.size() == PendingArgFlags.size() &&
14783 "PendingLocs and PendingArgFlags out of sync");
14784
14785 // Handle passing f64 on RV32D with a soft float ABI or when floating point
14786 // registers are exhausted.
14787 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
14788 assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
14789 "Can't lower f64 if it is split");
14790 // Depending on available argument GPRS, f64 may be passed in a pair of
14791 // GPRs, split between a GPR and the stack, or passed completely on the
14792 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
14793 // cases.
14794 Register Reg = State.AllocateReg(ArgGPRs);
14795 LocVT = MVT::i32;
14796 if (!Reg) {
14797 unsigned StackOffset = State.AllocateStack(8, Align(8));
14798 State.addLoc(
14799 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
14800 return false;
14801 }
14802 if (!State.AllocateReg(ArgGPRs))
14803 State.AllocateStack(4, Align(4));
14804 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
14805 return false;
14806 }
14807
14808 // Fixed-length vectors are located in the corresponding scalable-vector
14809 // container types.
14810 if (ValVT.isFixedLengthVector())
14811 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
14812
14813 // Split arguments might be passed indirectly, so keep track of the pending
14814 // values. Split vectors are passed via a mix of registers and indirectly, so
14815 // treat them as we would any other argument.
14816 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
14817 LocVT = XLenVT;
14818 LocInfo = CCValAssign::Indirect;
14819 PendingLocs.push_back(
14820 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
14821 PendingArgFlags.push_back(ArgFlags);
14822 if (!ArgFlags.isSplitEnd()) {
14823 return false;
14824 }
14825 }
14826
14827 // If the split argument only had two elements, it should be passed directly
14828 // in registers or on the stack.
14829 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
14830 PendingLocs.size() <= 2) {
14831 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
14832 // Apply the normal calling convention rules to the first half of the
14833 // split argument.
14834 CCValAssign VA = PendingLocs[0];
14835 ISD::ArgFlagsTy AF = PendingArgFlags[0];
14836 PendingLocs.clear();
14837 PendingArgFlags.clear();
14838 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
14839 ArgFlags);
14840 }
14841
14842 // Allocate to a register if possible, or else a stack slot.
14843 Register Reg;
14844 unsigned StoreSizeBytes = XLen / 8;
14845 Align StackAlign = Align(XLen / 8);
14846
14847 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
14848 Reg = State.AllocateReg(ArgFPR16s);
14849 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
14850 Reg = State.AllocateReg(ArgFPR32s);
14851 else if (ValVT == MVT::f64 && !UseGPRForF64)
14852 Reg = State.AllocateReg(ArgFPR64s);
14853 else if (ValVT.isVector()) {
14854 Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
14855 if (!Reg) {
14856 // For return values, the vector must be passed fully via registers or
14857 // via the stack.
14858 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
14859 // but we're using all of them.
14860 if (IsRet)
14861 return true;
14862 // Try using a GPR to pass the address
14863 if ((Reg = State.AllocateReg(ArgGPRs))) {
14864 LocVT = XLenVT;
14865 LocInfo = CCValAssign::Indirect;
14866 } else if (ValVT.isScalableVector()) {
14867 LocVT = XLenVT;
14868 LocInfo = CCValAssign::Indirect;
14869 } else {
14870 // Pass fixed-length vectors on the stack.
14871 LocVT = ValVT;
14872 StoreSizeBytes = ValVT.getStoreSize();
14873 // Align vectors to their element sizes, being careful for vXi1
14874 // vectors.
14875 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
14876 }
14877 }
14878 } else {
14879 Reg = State.AllocateReg(ArgGPRs);
14880 }
14881
14882 unsigned StackOffset =
14883 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
14884
14885 // If we reach this point and PendingLocs is non-empty, we must be at the
14886 // end of a split argument that must be passed indirectly.
14887 if (!PendingLocs.empty()) {
14888 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
14889 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
14890
14891 for (auto &It : PendingLocs) {
14892 if (Reg)
14893 It.convertToReg(Reg);
14894 else
14895 It.convertToMem(StackOffset);
14896 State.addLoc(It);
14897 }
14898 PendingLocs.clear();
14899 PendingArgFlags.clear();
14900 return false;
14901 }
14902
14903 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
14904 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
14905 "Expected an XLenVT or vector types at this stage");
14906
14907 if (Reg) {
14908 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
14909 return false;
14910 }
14911
14912 // When a scalar floating-point value is passed on the stack, no
14913 // bit-conversion is needed.
14914 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
14915 assert(!ValVT.isVector());
14916 LocVT = ValVT;
14917 LocInfo = CCValAssign::Full;
14918 }
14919 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
14920 return false;
14921}
14922
14923template <typename ArgTy>
14924static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
14925 for (const auto &ArgIdx : enumerate(Args)) {
14926 MVT ArgVT = ArgIdx.value().VT;
14927 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
14928 return ArgIdx.index();
14929 }
14930 return std::nullopt;
14931}
14932
14933void RISCVTargetLowering::analyzeInputArgs(
14934 MachineFunction &MF, CCState &CCInfo,
14935 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
14936 RISCVCCAssignFn Fn) const {
14937 unsigned NumArgs = Ins.size();
14939
14940 std::optional<unsigned> FirstMaskArgument;
14941 if (Subtarget.hasVInstructions())
14942 FirstMaskArgument = preAssignMask(Ins);
14943
14944 for (unsigned i = 0; i != NumArgs; ++i) {
14945 MVT ArgVT = Ins[i].VT;
14946 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
14947
14948 Type *ArgTy = nullptr;
14949 if (IsRet)
14950 ArgTy = FType->getReturnType();
14951 else if (Ins[i].isOrigArg())
14952 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
14953
14955 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
14956 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
14957 FirstMaskArgument)) {
14958 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
14959 << ArgVT << '\n');
14960 llvm_unreachable(nullptr);
14961 }
14962 }
14963}
14964
14965void RISCVTargetLowering::analyzeOutputArgs(
14966 MachineFunction &MF, CCState &CCInfo,
14967 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
14968 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
14969 unsigned NumArgs = Outs.size();
14970
14971 std::optional<unsigned> FirstMaskArgument;
14972 if (Subtarget.hasVInstructions())
14973 FirstMaskArgument = preAssignMask(Outs);
14974
14975 for (unsigned i = 0; i != NumArgs; i++) {
14976 MVT ArgVT = Outs[i].VT;
14977 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
14978 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
14979
14981 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
14982 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
14983 FirstMaskArgument)) {
14984 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
14985 << ArgVT << "\n");
14986 llvm_unreachable(nullptr);
14987 }
14988 }
14989}
14990
14991// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
14992// values.
14994 const CCValAssign &VA, const SDLoc &DL,
14995 const RISCVSubtarget &Subtarget) {
14996 switch (VA.getLocInfo()) {
14997 default:
14998 llvm_unreachable("Unexpected CCValAssign::LocInfo");
14999 case CCValAssign::Full:
15001 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
15002 break;
15003 case CCValAssign::BCvt:
15004 if (VA.getLocVT().isInteger() &&
15005 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
15006 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
15007 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
15008 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
15009 else
15010 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
15011 break;
15012 }
15013 return Val;
15014}
15015
15016// The caller is responsible for loading the full value if the argument is
15017// passed with CCValAssign::Indirect.
15019 const CCValAssign &VA, const SDLoc &DL,
15020 const ISD::InputArg &In,
15021 const RISCVTargetLowering &TLI) {
15024 EVT LocVT = VA.getLocVT();
15025 SDValue Val;
15026 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
15027 Register VReg = RegInfo.createVirtualRegister(RC);
15028 RegInfo.addLiveIn(VA.getLocReg(), VReg);
15029 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
15030
15031 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
15032 if (In.isOrigArg()) {
15033 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
15034 if (OrigArg->getType()->isIntegerTy()) {
15035 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
15036 // An input zero extended from i31 can also be considered sign extended.
15037 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
15038 (BitWidth < 32 && In.Flags.isZExt())) {
15040 RVFI->addSExt32Register(VReg);
15041 }
15042 }
15043 }
15044
15046 return Val;
15047
15048 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
15049}
15050
15052 const CCValAssign &VA, const SDLoc &DL,
15053 const RISCVSubtarget &Subtarget) {
15054 EVT LocVT = VA.getLocVT();
15055
15056 switch (VA.getLocInfo()) {
15057 default:
15058 llvm_unreachable("Unexpected CCValAssign::LocInfo");
15059 case CCValAssign::Full:
15060 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
15061 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
15062 break;
15063 case CCValAssign::BCvt:
15064 if (VA.getLocVT().isInteger() &&
15065 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
15066 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
15067 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
15068 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
15069 else
15070 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
15071 break;
15072 }
15073 return Val;
15074}
15075
15076// The caller is responsible for loading the full value if the argument is
15077// passed with CCValAssign::Indirect.
15079 const CCValAssign &VA, const SDLoc &DL) {
15081 MachineFrameInfo &MFI = MF.getFrameInfo();
15082 EVT LocVT = VA.getLocVT();
15083 EVT ValVT = VA.getValVT();
15085 if (ValVT.isScalableVector()) {
15086 // When the value is a scalable vector, we save the pointer which points to
15087 // the scalable vector value in the stack. The ValVT will be the pointer
15088 // type, instead of the scalable vector type.
15089 ValVT = LocVT;
15090 }
15091 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
15092 /*IsImmutable=*/true);
15093 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
15094 SDValue Val;
15095
15096 ISD::LoadExtType ExtType;
15097 switch (VA.getLocInfo()) {
15098 default:
15099 llvm_unreachable("Unexpected CCValAssign::LocInfo");
15100 case CCValAssign::Full:
15102 case CCValAssign::BCvt:
15103 ExtType = ISD::NON_EXTLOAD;
15104 break;
15105 }
15106 Val = DAG.getExtLoad(
15107 ExtType, DL, LocVT, Chain, FIN,
15109 return Val;
15110}
15111
15113 const CCValAssign &VA, const SDLoc &DL) {
15114 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
15115 "Unexpected VA");
15117 MachineFrameInfo &MFI = MF.getFrameInfo();
15119
15120 if (VA.isMemLoc()) {
15121 // f64 is passed on the stack.
15122 int FI =
15123 MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
15124 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
15125 return DAG.getLoad(MVT::f64, DL, Chain, FIN,
15127 }
15128
15129 assert(VA.isRegLoc() && "Expected register VA assignment");
15130
15131 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
15132 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
15133 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
15134 SDValue Hi;
15135 if (VA.getLocReg() == RISCV::X17) {
15136 // Second half of f64 is passed on the stack.
15137 int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
15138 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
15139 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
15141 } else {
15142 // Second half of f64 is passed in another GPR.
15143 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
15144 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
15145 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
15146 }
15147 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
15148}
15149
15150// FastCC has less than 1% performance improvement for some particular
15151// benchmark. But theoretically, it may has benenfit for some cases.
15153 unsigned ValNo, MVT ValVT, MVT LocVT,
15154 CCValAssign::LocInfo LocInfo,
15155 ISD::ArgFlagsTy ArgFlags, CCState &State,
15156 bool IsFixed, bool IsRet, Type *OrigTy,
15157 const RISCVTargetLowering &TLI,
15158 std::optional<unsigned> FirstMaskArgument) {
15159
15160 // X5 and X6 might be used for save-restore libcall.
15161 static const MCPhysReg GPRList[] = {
15162 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
15163 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
15164 RISCV::X29, RISCV::X30, RISCV::X31};
15165
15166 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
15167 if (unsigned Reg = State.AllocateReg(GPRList)) {
15168 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15169 return false;
15170 }
15171 }
15172
15173 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
15174
15175 if (LocVT == MVT::f16 &&
15176 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
15177 static const MCPhysReg FPR16List[] = {
15178 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
15179 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
15180 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
15181 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
15182 if (unsigned Reg = State.AllocateReg(FPR16List)) {
15183 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15184 return false;
15185 }
15186 }
15187
15188 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
15189 static const MCPhysReg FPR32List[] = {
15190 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
15191 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
15192 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
15193 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
15194 if (unsigned Reg = State.AllocateReg(FPR32List)) {
15195 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15196 return false;
15197 }
15198 }
15199
15200 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
15201 static const MCPhysReg FPR64List[] = {
15202 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
15203 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
15204 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
15205 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
15206 if (unsigned Reg = State.AllocateReg(FPR64List)) {
15207 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15208 return false;
15209 }
15210 }
15211
15212 // Check if there is an available GPR before hitting the stack.
15213 if ((LocVT == MVT::f16 &&
15214 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
15215 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
15216 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
15217 Subtarget.hasStdExtZdinx())) {
15218 if (unsigned Reg = State.AllocateReg(GPRList)) {
15219 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15220 return false;
15221 }
15222 }
15223
15224 if (LocVT == MVT::f16) {
15225 unsigned Offset2 = State.AllocateStack(2, Align(2));
15226 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
15227 return false;
15228 }
15229
15230 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
15231 unsigned Offset4 = State.AllocateStack(4, Align(4));
15232 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
15233 return false;
15234 }
15235
15236 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
15237 unsigned Offset5 = State.AllocateStack(8, Align(8));
15238 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
15239 return false;
15240 }
15241
15242 if (LocVT.isVector()) {
15243 if (unsigned Reg =
15244 allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
15245 // Fixed-length vectors are located in the corresponding scalable-vector
15246 // container types.
15247 if (ValVT.isFixedLengthVector())
15248 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
15249 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15250 } else {
15251 // Try and pass the address via a "fast" GPR.
15252 if (unsigned GPRReg = State.AllocateReg(GPRList)) {
15253 LocInfo = CCValAssign::Indirect;
15254 LocVT = TLI.getSubtarget().getXLenVT();
15255 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
15256 } else if (ValVT.isFixedLengthVector()) {
15257 auto StackAlign =
15259 unsigned StackOffset =
15260 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
15261 State.addLoc(
15262 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
15263 } else {
15264 // Can't pass scalable vectors on the stack.
15265 return true;
15266 }
15267 }
15268
15269 return false;
15270 }
15271
15272 return true; // CC didn't match.
15273}
15274
15275bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
15276 CCValAssign::LocInfo LocInfo,
15277 ISD::ArgFlagsTy ArgFlags, CCState &State) {
15278 if (ArgFlags.isNest()) {
15280 "Attribute 'nest' is not supported in GHC calling convention");
15281 }
15282
15283 static const MCPhysReg GPRList[] = {
15284 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
15285 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
15286
15287 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
15288 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
15289 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
15290 if (unsigned Reg = State.AllocateReg(GPRList)) {
15291 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15292 return false;
15293 }
15294 }
15295
15296 const RISCVSubtarget &Subtarget =
15297 State.getMachineFunction().getSubtarget<RISCVSubtarget>();
15298
15299 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
15300 // Pass in STG registers: F1, ..., F6
15301 // fs0 ... fs5
15302 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
15303 RISCV::F18_F, RISCV::F19_F,
15304 RISCV::F20_F, RISCV::F21_F};
15305 if (unsigned Reg = State.AllocateReg(FPR32List)) {
15306 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15307 return false;
15308 }
15309 }
15310
15311 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
15312 // Pass in STG registers: D1, ..., D6
15313 // fs6 ... fs11
15314 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
15315 RISCV::F24_D, RISCV::F25_D,
15316 RISCV::F26_D, RISCV::F27_D};
15317 if (unsigned Reg = State.AllocateReg(FPR64List)) {
15318 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15319 return false;
15320 }
15321 }
15322
15323 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
15324 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
15325 Subtarget.is64Bit())) {
15326 if (unsigned Reg = State.AllocateReg(GPRList)) {
15327 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15328 return false;
15329 }
15330 }
15331
15332 report_fatal_error("No registers left in GHC calling convention");
15333 return true;
15334}
15335
15336// Transform physical registers into virtual registers.
15338 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
15339 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
15340 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
15341
15343
15344 switch (CallConv) {
15345 default:
15346 report_fatal_error("Unsupported calling convention");
15347 case CallingConv::C:
15348 case CallingConv::Fast:
15349 break;
15350 case CallingConv::GHC:
15351 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
15352 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
15353 "(Zdinx/D) instruction set extensions");
15354 }
15355
15356 const Function &Func = MF.getFunction();
15357 if (Func.hasFnAttribute("interrupt")) {
15358 if (!Func.arg_empty())
15360 "Functions with the interrupt attribute cannot have arguments!");
15361
15362 StringRef Kind =
15363 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
15364
15365 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
15367 "Function interrupt attribute argument not supported!");
15368 }
15369
15370 EVT PtrVT = getPointerTy(DAG.getDataLayout());
15371 MVT XLenVT = Subtarget.getXLenVT();
15372 unsigned XLenInBytes = Subtarget.getXLen() / 8;
15373 // Used with vargs to acumulate store chains.
15374 std::vector<SDValue> OutChains;
15375
15376 // Assign locations to all of the incoming arguments.
15378 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
15379
15380 if (CallConv == CallingConv::GHC)
15382 else
15383 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
15385 : RISCV::CC_RISCV);
15386
15387 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
15388 CCValAssign &VA = ArgLocs[i];
15389 SDValue ArgValue;
15390 // Passing f64 on RV32D with a soft float ABI must be handled as a special
15391 // case.
15392 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
15393 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
15394 else if (VA.isRegLoc())
15395 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
15396 else
15397 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
15398
15399 if (VA.getLocInfo() == CCValAssign::Indirect) {
15400 // If the original argument was split and passed by reference (e.g. i128
15401 // on RV32), we need to load all parts of it here (using the same
15402 // address). Vectors may be partly split to registers and partly to the
15403 // stack, in which case the base address is partly offset and subsequent
15404 // stores are relative to that.
15405 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
15407 unsigned ArgIndex = Ins[i].OrigArgIndex;
15408 unsigned ArgPartOffset = Ins[i].PartOffset;
15409 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
15410 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
15411 CCValAssign &PartVA = ArgLocs[i + 1];
15412 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
15413 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
15414 if (PartVA.getValVT().isScalableVector())
15415 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
15416 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
15417 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
15419 ++i;
15420 }
15421 continue;
15422 }
15423 InVals.push_back(ArgValue);
15424 }
15425
15426 if (any_of(ArgLocs,
15427 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
15429
15430 if (IsVarArg) {
15432 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
15433 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
15434 MachineFrameInfo &MFI = MF.getFrameInfo();
15435 MachineRegisterInfo &RegInfo = MF.getRegInfo();
15437
15438 // Offset of the first variable argument from stack pointer, and size of
15439 // the vararg save area. For now, the varargs save area is either zero or
15440 // large enough to hold a0-a7.
15441 int VaArgOffset, VarArgsSaveSize;
15442
15443 // If all registers are allocated, then all varargs must be passed on the
15444 // stack and we don't need to save any argregs.
15445 if (ArgRegs.size() == Idx) {
15446 VaArgOffset = CCInfo.getStackSize();
15447 VarArgsSaveSize = 0;
15448 } else {
15449 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
15450 VaArgOffset = -VarArgsSaveSize;
15451 }
15452
15453 // Record the frame index of the first variable argument
15454 // which is a value necessary to VASTART.
15455 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
15456 RVFI->setVarArgsFrameIndex(FI);
15457
15458 // If saving an odd number of registers then create an extra stack slot to
15459 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
15460 // offsets to even-numbered registered remain 2*XLEN-aligned.
15461 if (Idx % 2) {
15462 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
15463 VarArgsSaveSize += XLenInBytes;
15464 }
15465
15466 // Copy the integer registers that may have been used for passing varargs
15467 // to the vararg save area.
15468 for (unsigned I = Idx; I < ArgRegs.size();
15469 ++I, VaArgOffset += XLenInBytes) {
15470 const Register Reg = RegInfo.createVirtualRegister(RC);
15471 RegInfo.addLiveIn(ArgRegs[I], Reg);
15472 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
15473 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
15474 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
15475 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
15477 cast<StoreSDNode>(Store.getNode())
15478 ->getMemOperand()
15479 ->setValue((Value *)nullptr);
15480 OutChains.push_back(Store);
15481 }
15482 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
15483 }
15484
15485 // All stores are grouped in one node to allow the matching between
15486 // the size of Ins and InVals. This only happens for vararg functions.
15487 if (!OutChains.empty()) {
15488 OutChains.push_back(Chain);
15489 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
15490 }
15491
15492 return Chain;
15493}
15494
15495/// isEligibleForTailCallOptimization - Check whether the call is eligible
15496/// for tail call optimization.
15497/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
15498bool RISCVTargetLowering::isEligibleForTailCallOptimization(
15499 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
15500 const SmallVector<CCValAssign, 16> &ArgLocs) const {
15501
15502 auto CalleeCC = CLI.CallConv;
15503 auto &Outs = CLI.Outs;
15504 auto &Caller = MF.getFunction();
15505 auto CallerCC = Caller.getCallingConv();
15506
15507 // Exception-handling functions need a special set of instructions to
15508 // indicate a return to the hardware. Tail-calling another function would
15509 // probably break this.
15510 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
15511 // should be expanded as new function attributes are introduced.
15512 if (Caller.hasFnAttribute("interrupt"))
15513 return false;
15514
15515 // Do not tail call opt if the stack is used to pass parameters.
15516 if (CCInfo.getStackSize() != 0)
15517 return false;
15518
15519 // Do not tail call opt if any parameters need to be passed indirectly.
15520 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
15521 // passed indirectly. So the address of the value will be passed in a
15522 // register, or if not available, then the address is put on the stack. In
15523 // order to pass indirectly, space on the stack often needs to be allocated
15524 // in order to store the value. In this case the CCInfo.getNextStackOffset()
15525 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
15526 // are passed CCValAssign::Indirect.
15527 for (auto &VA : ArgLocs)
15528 if (VA.getLocInfo() == CCValAssign::Indirect)
15529 return false;
15530
15531 // Do not tail call opt if either caller or callee uses struct return
15532 // semantics.
15533 auto IsCallerStructRet = Caller.hasStructRetAttr();
15534 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
15535 if (IsCallerStructRet || IsCalleeStructRet)
15536 return false;
15537
15538 // The callee has to preserve all registers the caller needs to preserve.
15539 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
15540 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
15541 if (CalleeCC != CallerCC) {
15542 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
15543 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
15544 return false;
15545 }
15546
15547 // Byval parameters hand the function a pointer directly into the stack area
15548 // we want to reuse during a tail call. Working around this *is* possible
15549 // but less efficient and uglier in LowerCall.
15550 for (auto &Arg : Outs)
15551 if (Arg.Flags.isByVal())
15552 return false;
15553
15554 return true;
15555}
15556
15558 return DAG.getDataLayout().getPrefTypeAlign(
15559 VT.getTypeForEVT(*DAG.getContext()));
15560}
15561
15562// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
15563// and output parameter nodes.
15565 SmallVectorImpl<SDValue> &InVals) const {
15566 SelectionDAG &DAG = CLI.DAG;
15567 SDLoc &DL = CLI.DL;
15569 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
15571 SDValue Chain = CLI.Chain;
15572 SDValue Callee = CLI.Callee;
15573 bool &IsTailCall = CLI.IsTailCall;
15574 CallingConv::ID CallConv = CLI.CallConv;
15575 bool IsVarArg = CLI.IsVarArg;
15576 EVT PtrVT = getPointerTy(DAG.getDataLayout());
15577 MVT XLenVT = Subtarget.getXLenVT();
15578
15580
15581 // Analyze the operands of the call, assigning locations to each operand.
15583 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
15584
15585 if (CallConv == CallingConv::GHC)
15587 else
15588 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
15590 : RISCV::CC_RISCV);
15591
15592 // Check if it's really possible to do a tail call.
15593 if (IsTailCall)
15594 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
15595
15596 if (IsTailCall)
15597 ++NumTailCalls;
15598 else if (CLI.CB && CLI.CB->isMustTailCall())
15599 report_fatal_error("failed to perform tail call elimination on a call "
15600 "site marked musttail");
15601
15602 // Get a count of how many bytes are to be pushed on the stack.
15603 unsigned NumBytes = ArgCCInfo.getStackSize();
15604
15605 // Create local copies for byval args
15606 SmallVector<SDValue, 8> ByValArgs;
15607 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
15608 ISD::ArgFlagsTy Flags = Outs[i].Flags;
15609 if (!Flags.isByVal())
15610 continue;
15611
15612 SDValue Arg = OutVals[i];
15613 unsigned Size = Flags.getByValSize();
15614 Align Alignment = Flags.getNonZeroByValAlign();
15615
15616 int FI =
15617 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
15618 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
15619 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
15620
15621 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
15622 /*IsVolatile=*/false,
15623 /*AlwaysInline=*/false, IsTailCall,
15625 ByValArgs.push_back(FIPtr);
15626 }
15627
15628 if (!IsTailCall)
15629 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
15630
15631 // Copy argument values to their designated locations.
15633 SmallVector<SDValue, 8> MemOpChains;
15634 SDValue StackPtr;
15635 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
15636 CCValAssign &VA = ArgLocs[i];
15637 SDValue ArgValue = OutVals[i];
15638 ISD::ArgFlagsTy Flags = Outs[i].Flags;
15639
15640 // Handle passing f64 on RV32D with a soft float ABI as a special case.
15641 bool IsF64OnRV32DSoftABI =
15642 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
15643 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
15644 SDValue SplitF64 = DAG.getNode(
15645 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
15646 SDValue Lo = SplitF64.getValue(0);
15647 SDValue Hi = SplitF64.getValue(1);
15648
15649 Register RegLo = VA.getLocReg();
15650 RegsToPass.push_back(std::make_pair(RegLo, Lo));
15651
15652 if (RegLo == RISCV::X17) {
15653 // Second half of f64 is passed on the stack.
15654 // Work out the address of the stack slot.
15655 if (!StackPtr.getNode())
15656 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
15657 // Emit the store.
15658 MemOpChains.push_back(
15659 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
15660 } else {
15661 // Second half of f64 is passed in another GPR.
15662 assert(RegLo < RISCV::X31 && "Invalid register pair");
15663 Register RegHigh = RegLo + 1;
15664 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
15665 }
15666 continue;
15667 }
15668
15669 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
15670 // as any other MemLoc.
15671
15672 // Promote the value if needed.
15673 // For now, only handle fully promoted and indirect arguments.
15674 if (VA.getLocInfo() == CCValAssign::Indirect) {
15675 // Store the argument in a stack slot and pass its address.
15676 Align StackAlign =
15677 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
15678 getPrefTypeAlign(ArgValue.getValueType(), DAG));
15679 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
15680 // If the original argument was split (e.g. i128), we need
15681 // to store the required parts of it here (and pass just one address).
15682 // Vectors may be partly split to registers and partly to the stack, in
15683 // which case the base address is partly offset and subsequent stores are
15684 // relative to that.
15685 unsigned ArgIndex = Outs[i].OrigArgIndex;
15686 unsigned ArgPartOffset = Outs[i].PartOffset;
15687 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
15688 // Calculate the total size to store. We don't have access to what we're
15689 // actually storing other than performing the loop and collecting the
15690 // info.
15692 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
15693 SDValue PartValue = OutVals[i + 1];
15694 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
15695 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
15696 EVT PartVT = PartValue.getValueType();
15697 if (PartVT.isScalableVector())
15698 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
15699 StoredSize += PartVT.getStoreSize();
15700 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
15701 Parts.push_back(std::make_pair(PartValue, Offset));
15702 ++i;
15703 }
15704 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
15705 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
15706 MemOpChains.push_back(
15707 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
15709 for (const auto &Part : Parts) {
15710 SDValue PartValue = Part.first;
15711 SDValue PartOffset = Part.second;
15712 SDValue Address =
15713 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
15714 MemOpChains.push_back(
15715 DAG.getStore(Chain, DL, PartValue, Address,
15717 }
15718 ArgValue = SpillSlot;
15719 } else {
15720 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
15721 }
15722
15723 // Use local copy if it is a byval arg.
15724 if (Flags.isByVal())
15725 ArgValue = ByValArgs[j++];
15726
15727 if (VA.isRegLoc()) {
15728 // Queue up the argument copies and emit them at the end.
15729 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
15730 } else {
15731 assert(VA.isMemLoc() && "Argument not register or memory");
15732 assert(!IsTailCall && "Tail call not allowed if stack is used "
15733 "for passing parameters");
15734
15735 // Work out the address of the stack slot.
15736 if (!StackPtr.getNode())
15737 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
15738 SDValue Address =
15739 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
15741
15742 // Emit the store.
15743 MemOpChains.push_back(
15744 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
15745 }
15746 }
15747
15748 // Join the stores, which are independent of one another.
15749 if (!MemOpChains.empty())
15750 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
15751
15752 SDValue Glue;
15753
15754 // Build a sequence of copy-to-reg nodes, chained and glued together.
15755 for (auto &Reg : RegsToPass) {
15756 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
15757 Glue = Chain.getValue(1);
15758 }
15759
15760 // Validate that none of the argument registers have been marked as
15761 // reserved, if so report an error. Do the same for the return address if this
15762 // is not a tailcall.
15763 validateCCReservedRegs(RegsToPass, MF);
15764 if (!IsTailCall &&
15767 MF.getFunction(),
15768 "Return address register required, but has been reserved."});
15769
15770 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
15771 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
15772 // split it and then direct call can be matched by PseudoCALL.
15774 const GlobalValue *GV = S->getGlobal();
15775
15776 unsigned OpFlags = RISCVII::MO_CALL;
15777 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
15778 OpFlags = RISCVII::MO_PLT;
15779
15780 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
15782 unsigned OpFlags = RISCVII::MO_CALL;
15783
15784 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
15785 nullptr))
15786 OpFlags = RISCVII::MO_PLT;
15787
15788 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
15789 }
15790
15791 // The first call operand is the chain and the second is the target address.
15793 Ops.push_back(Chain);
15794 Ops.push_back(Callee);
15795
15796 // Add argument registers to the end of the list so that they are
15797 // known live into the call.
15798 for (auto &Reg : RegsToPass)
15799 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
15800
15801 if (!IsTailCall) {
15802 // Add a register mask operand representing the call-preserved registers.
15803 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
15804 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
15805 assert(Mask && "Missing call preserved mask for calling convention");
15806 Ops.push_back(DAG.getRegisterMask(Mask));
15807 }
15808
15809 // Glue the call to the argument copies, if any.
15810 if (Glue.getNode())
15811 Ops.push_back(Glue);
15812
15813 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
15814 "Unexpected CFI type for a direct call");
15815
15816 // Emit the call.
15817 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
15818
15819 if (IsTailCall) {
15821 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
15822 if (CLI.CFIType)
15823 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
15824 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
15825 return Ret;
15826 }
15827
15828 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
15829 if (CLI.CFIType)
15830 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
15831 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
15832 Glue = Chain.getValue(1);
15833
15834 // Mark the end of the call, which is glued to the call itself.
15835 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
15836 Glue = Chain.getValue(1);
15837
15838 // Assign locations to each value returned by this call.
15840 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
15841 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
15842
15843 // Copy all of the result registers out of their specified physreg.
15844 for (auto &VA : RVLocs) {
15845 // Copy the value out
15846 SDValue RetValue =
15847 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
15848 // Glue the RetValue to the end of the call sequence
15849 Chain = RetValue.getValue(1);
15850 Glue = RetValue.getValue(2);
15851
15852 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
15853 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
15854 SDValue RetValue2 =
15855 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
15856 Chain = RetValue2.getValue(1);
15857 Glue = RetValue2.getValue(2);
15858 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
15859 RetValue2);
15860 }
15861
15862 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
15863
15864 InVals.push_back(RetValue);
15865 }
15866
15867 return Chain;
15868}
15869
15871 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
15872 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
15874 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
15875
15876 std::optional<unsigned> FirstMaskArgument;
15877 if (Subtarget.hasVInstructions())
15878 FirstMaskArgument = preAssignMask(Outs);
15879
15880 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
15881 MVT VT = Outs[i].VT;
15882 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
15883 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
15884 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
15885 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
15886 *this, FirstMaskArgument))
15887 return false;
15888 }
15889 return true;
15890}
15891
15892SDValue
15893RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
15894 bool IsVarArg,
15896 const SmallVectorImpl<SDValue> &OutVals,
15897 const SDLoc &DL, SelectionDAG &DAG) const {
15899 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
15900
15901 // Stores the assignment of the return value to a location.
15903
15904 // Info about the registers and stack slot.
15905 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
15906 *DAG.getContext());
15907
15908 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
15909 nullptr, RISCV::CC_RISCV);
15910
15911 if (CallConv == CallingConv::GHC && !RVLocs.empty())
15912 report_fatal_error("GHC functions return void only");
15913
15914 SDValue Glue;
15915 SmallVector<SDValue, 4> RetOps(1, Chain);
15916
15917 // Copy the result values into the output registers.
15918 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
15919 SDValue Val = OutVals[i];
15920 CCValAssign &VA = RVLocs[i];
15921 assert(VA.isRegLoc() && "Can only return in registers!");
15922
15923 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
15924 // Handle returning f64 on RV32D with a soft float ABI.
15925 assert(VA.isRegLoc() && "Expected return via registers");
15926 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
15927 DAG.getVTList(MVT::i32, MVT::i32), Val);
15928 SDValue Lo = SplitF64.getValue(0);
15929 SDValue Hi = SplitF64.getValue(1);
15930 Register RegLo = VA.getLocReg();
15931 assert(RegLo < RISCV::X31 && "Invalid register pair");
15932 Register RegHi = RegLo + 1;
15933
15934 if (STI.isRegisterReservedByUser(RegLo) ||
15935 STI.isRegisterReservedByUser(RegHi))
15937 MF.getFunction(),
15938 "Return value register required, but has been reserved."});
15939
15940 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
15941 Glue = Chain.getValue(1);
15942 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
15943 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
15944 Glue = Chain.getValue(1);
15945 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
15946 } else {
15947 // Handle a 'normal' return.
15948 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
15949 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
15950
15951 if (STI.isRegisterReservedByUser(VA.getLocReg()))
15953 MF.getFunction(),
15954 "Return value register required, but has been reserved."});
15955
15956 // Guarantee that all emitted copies are stuck together.
15957 Glue = Chain.getValue(1);
15958 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
15959 }
15960 }
15961
15962 RetOps[0] = Chain; // Update chain.
15963
15964 // Add the glue node if we have it.
15965 if (Glue.getNode()) {
15966 RetOps.push_back(Glue);
15967 }
15968
15969 if (any_of(RVLocs,
15970 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
15972
15973 unsigned RetOpc = RISCVISD::RET_GLUE;
15974 // Interrupt service routines use different return instructions.
15975 const Function &Func = DAG.getMachineFunction().getFunction();
15976 if (Func.hasFnAttribute("interrupt")) {
15977 if (!Func.getReturnType()->isVoidTy())
15979 "Functions with the interrupt attribute must have void return type!");
15980
15982 StringRef Kind =
15983 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
15984
15985 if (Kind == "supervisor")
15986 RetOpc = RISCVISD::SRET_GLUE;
15987 else
15988 RetOpc = RISCVISD::MRET_GLUE;
15989 }
15990
15991 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
15992}
15993
15994void RISCVTargetLowering::validateCCReservedRegs(
15995 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
15996 MachineFunction &MF) const {
15997 const Function &F = MF.getFunction();
15998 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
15999
16000 if (llvm::any_of(Regs, [&STI](auto Reg) {
16001 return STI.isRegisterReservedByUser(Reg.first);
16002 }))
16003 F.getContext().diagnose(DiagnosticInfoUnsupported{
16004 F, "Argument register required, but has been reserved."});
16005}
16006
16007// Check if the result of the node is only used as a return value, as
16008// otherwise we can't perform a tail-call.
16010 if (N->getNumValues() != 1)
16011 return false;
16012 if (!N->hasNUsesOfValue(1, 0))
16013 return false;
16014
16015 SDNode *Copy = *N->use_begin();
16016
16017 if (Copy->getOpcode() == ISD::BITCAST) {
16018 return isUsedByReturnOnly(Copy, Chain);
16019 }
16020
16021 // TODO: Handle additional opcodes in order to support tail-calling libcalls
16022 // with soft float ABIs.
16023 if (Copy->getOpcode() != ISD::CopyToReg) {
16024 return false;
16025 }
16026
16027 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
16028 // isn't safe to perform a tail call.
16029 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
16030 return false;
16031
16032 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
16033 bool HasRet = false;
16034 for (SDNode *Node : Copy->uses()) {
16035 if (Node->getOpcode() != RISCVISD::RET_GLUE)
16036 return false;
16037 HasRet = true;
16038 }
16039 if (!HasRet)
16040 return false;
16041
16042 Chain = Copy->getOperand(0);
16043 return true;
16044}
16045
16047 return CI->isTailCall();
16048}
16049
16050const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
16051#define NODE_NAME_CASE(NODE) \
16052 case RISCVISD::NODE: \
16053 return "RISCVISD::" #NODE;
16054 // clang-format off
16055 switch ((RISCVISD::NodeType)Opcode) {
16057 break;
16058 NODE_NAME_CASE(RET_GLUE)
16059 NODE_NAME_CASE(SRET_GLUE)
16060 NODE_NAME_CASE(MRET_GLUE)
16061 NODE_NAME_CASE(CALL)
16062 NODE_NAME_CASE(SELECT_CC)
16063 NODE_NAME_CASE(BR_CC)
16064 NODE_NAME_CASE(BuildPairF64)
16065 NODE_NAME_CASE(SplitF64)
16066 NODE_NAME_CASE(TAIL)
16067 NODE_NAME_CASE(ADD_LO)
16068 NODE_NAME_CASE(HI)
16069 NODE_NAME_CASE(LLA)
16070 NODE_NAME_CASE(LGA)
16071 NODE_NAME_CASE(ADD_TPREL)
16072 NODE_NAME_CASE(LA_TLS_IE)
16073 NODE_NAME_CASE(LA_TLS_GD)
16074 NODE_NAME_CASE(MULHSU)
16075 NODE_NAME_CASE(SLLW)
16076 NODE_NAME_CASE(SRAW)
16077 NODE_NAME_CASE(SRLW)
16078 NODE_NAME_CASE(DIVW)
16079 NODE_NAME_CASE(DIVUW)
16080 NODE_NAME_CASE(REMUW)
16081 NODE_NAME_CASE(ROLW)
16082 NODE_NAME_CASE(RORW)
16083 NODE_NAME_CASE(CLZW)
16084 NODE_NAME_CASE(CTZW)
16085 NODE_NAME_CASE(ABSW)
16086 NODE_NAME_CASE(FMV_H_X)
16087 NODE_NAME_CASE(FMV_X_ANYEXTH)
16088 NODE_NAME_CASE(FMV_X_SIGNEXTH)
16089 NODE_NAME_CASE(FMV_W_X_RV64)
16090 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
16091 NODE_NAME_CASE(FCVT_X)
16092 NODE_NAME_CASE(FCVT_XU)
16093 NODE_NAME_CASE(FCVT_W_RV64)
16094 NODE_NAME_CASE(FCVT_WU_RV64)
16095 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
16096 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
16097 NODE_NAME_CASE(FP_ROUND_BF16)
16098 NODE_NAME_CASE(FP_EXTEND_BF16)
16099 NODE_NAME_CASE(FROUND)
16100 NODE_NAME_CASE(FPCLASS)
16101 NODE_NAME_CASE(FMAX)
16102 NODE_NAME_CASE(FMIN)
16103 NODE_NAME_CASE(READ_CYCLE_WIDE)
16104 NODE_NAME_CASE(BREV8)
16105 NODE_NAME_CASE(ORC_B)
16106 NODE_NAME_CASE(ZIP)
16107 NODE_NAME_CASE(UNZIP)
16108 NODE_NAME_CASE(CLMUL)
16109 NODE_NAME_CASE(CLMULH)
16110 NODE_NAME_CASE(CLMULR)
16111 NODE_NAME_CASE(SHA256SIG0)
16112 NODE_NAME_CASE(SHA256SIG1)
16113 NODE_NAME_CASE(SHA256SUM0)
16114 NODE_NAME_CASE(SHA256SUM1)
16115 NODE_NAME_CASE(SM4KS)
16116 NODE_NAME_CASE(SM4ED)
16117 NODE_NAME_CASE(SM3P0)
16118 NODE_NAME_CASE(SM3P1)
16119 NODE_NAME_CASE(TH_LWD)
16120 NODE_NAME_CASE(TH_LWUD)
16121 NODE_NAME_CASE(TH_LDD)
16122 NODE_NAME_CASE(TH_SWD)
16123 NODE_NAME_CASE(TH_SDD)
16124 NODE_NAME_CASE(VMV_V_V_VL)
16125 NODE_NAME_CASE(VMV_V_X_VL)
16126 NODE_NAME_CASE(VFMV_V_F_VL)
16127 NODE_NAME_CASE(VMV_X_S)
16128 NODE_NAME_CASE(VMV_S_X_VL)
16129 NODE_NAME_CASE(VFMV_S_F_VL)
16130 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
16131 NODE_NAME_CASE(READ_VLENB)
16132 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
16133 NODE_NAME_CASE(VSLIDEUP_VL)
16134 NODE_NAME_CASE(VSLIDE1UP_VL)
16135 NODE_NAME_CASE(VSLIDEDOWN_VL)
16136 NODE_NAME_CASE(VSLIDE1DOWN_VL)
16137 NODE_NAME_CASE(VFSLIDE1UP_VL)
16138 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
16139 NODE_NAME_CASE(VID_VL)
16140 NODE_NAME_CASE(VFNCVT_ROD_VL)
16141 NODE_NAME_CASE(VECREDUCE_ADD_VL)
16142 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
16143 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
16144 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
16145 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
16146 NODE_NAME_CASE(VECREDUCE_AND_VL)
16147 NODE_NAME_CASE(VECREDUCE_OR_VL)
16148 NODE_NAME_CASE(VECREDUCE_XOR_VL)
16149 NODE_NAME_CASE(VECREDUCE_FADD_VL)
16150 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
16151 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
16152 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
16153 NODE_NAME_CASE(ADD_VL)
16154 NODE_NAME_CASE(AND_VL)
16155 NODE_NAME_CASE(MUL_VL)
16156 NODE_NAME_CASE(OR_VL)
16157 NODE_NAME_CASE(SDIV_VL)
16158 NODE_NAME_CASE(SHL_VL)
16159 NODE_NAME_CASE(SREM_VL)
16160 NODE_NAME_CASE(SRA_VL)
16161 NODE_NAME_CASE(SRL_VL)
16162 NODE_NAME_CASE(SUB_VL)
16163 NODE_NAME_CASE(UDIV_VL)
16164 NODE_NAME_CASE(UREM_VL)
16165 NODE_NAME_CASE(XOR_VL)
16166 NODE_NAME_CASE(SADDSAT_VL)
16167 NODE_NAME_CASE(UADDSAT_VL)
16168 NODE_NAME_CASE(SSUBSAT_VL)
16169 NODE_NAME_CASE(USUBSAT_VL)
16170 NODE_NAME_CASE(FADD_VL)
16171 NODE_NAME_CASE(FSUB_VL)
16172 NODE_NAME_CASE(FMUL_VL)
16173 NODE_NAME_CASE(FDIV_VL)
16174 NODE_NAME_CASE(FNEG_VL)
16175 NODE_NAME_CASE(FABS_VL)
16176 NODE_NAME_CASE(FSQRT_VL)
16177 NODE_NAME_CASE(FCLASS_VL)
16178 NODE_NAME_CASE(VFMADD_VL)
16179 NODE_NAME_CASE(VFNMADD_VL)
16180 NODE_NAME_CASE(VFMSUB_VL)
16181 NODE_NAME_CASE(VFNMSUB_VL)
16182 NODE_NAME_CASE(VFWMADD_VL)
16183 NODE_NAME_CASE(VFWNMADD_VL)
16184 NODE_NAME_CASE(VFWMSUB_VL)
16185 NODE_NAME_CASE(VFWNMSUB_VL)
16186 NODE_NAME_CASE(FCOPYSIGN_VL)
16187 NODE_NAME_CASE(SMIN_VL)
16188 NODE_NAME_CASE(SMAX_VL)
16189 NODE_NAME_CASE(UMIN_VL)
16190 NODE_NAME_CASE(UMAX_VL)
16191 NODE_NAME_CASE(BITREVERSE_VL)
16192 NODE_NAME_CASE(BSWAP_VL)
16193 NODE_NAME_CASE(CTLZ_VL)
16194 NODE_NAME_CASE(CTTZ_VL)
16195 NODE_NAME_CASE(CTPOP_VL)
16196 NODE_NAME_CASE(FMINNUM_VL)
16197 NODE_NAME_CASE(FMAXNUM_VL)
16198 NODE_NAME_CASE(MULHS_VL)
16199 NODE_NAME_CASE(MULHU_VL)
16200 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
16201 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
16202 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
16203 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
16204 NODE_NAME_CASE(VFCVT_X_F_VL)
16205 NODE_NAME_CASE(VFCVT_XU_F_VL)
16206 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
16207 NODE_NAME_CASE(SINT_TO_FP_VL)
16208 NODE_NAME_CASE(UINT_TO_FP_VL)
16209 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
16210 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
16211 NODE_NAME_CASE(FP_EXTEND_VL)
16212 NODE_NAME_CASE(FP_ROUND_VL)
16213 NODE_NAME_CASE(STRICT_FADD_VL)
16214 NODE_NAME_CASE(STRICT_FSUB_VL)
16215 NODE_NAME_CASE(STRICT_FMUL_VL)
16216 NODE_NAME_CASE(STRICT_FDIV_VL)
16217 NODE_NAME_CASE(STRICT_FSQRT_VL)
16218 NODE_NAME_CASE(STRICT_VFMADD_VL)
16219 NODE_NAME_CASE(STRICT_VFNMADD_VL)
16220 NODE_NAME_CASE(STRICT_VFMSUB_VL)
16221 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
16222 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
16223 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
16224 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
16225 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
16226 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
16227 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
16228 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
16229 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
16230 NODE_NAME_CASE(STRICT_FSETCC_VL)
16231 NODE_NAME_CASE(STRICT_FSETCCS_VL)
16232 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
16233 NODE_NAME_CASE(VWMUL_VL)
16234 NODE_NAME_CASE(VWMULU_VL)
16235 NODE_NAME_CASE(VWMULSU_VL)
16236 NODE_NAME_CASE(VWADD_VL)
16237 NODE_NAME_CASE(VWADDU_VL)
16238 NODE_NAME_CASE(VWSUB_VL)
16239 NODE_NAME_CASE(VWSUBU_VL)
16240 NODE_NAME_CASE(VWADD_W_VL)
16241 NODE_NAME_CASE(VWADDU_W_VL)
16242 NODE_NAME_CASE(VWSUB_W_VL)
16243 NODE_NAME_CASE(VWSUBU_W_VL)
16244 NODE_NAME_CASE(VFWMUL_VL)
16245 NODE_NAME_CASE(VFWADD_VL)
16246 NODE_NAME_CASE(VFWSUB_VL)
16247 NODE_NAME_CASE(VFWADD_W_VL)
16248 NODE_NAME_CASE(VFWSUB_W_VL)
16249 NODE_NAME_CASE(VWMACC_VL)
16250 NODE_NAME_CASE(VWMACCU_VL)
16251 NODE_NAME_CASE(VWMACCSU_VL)
16252 NODE_NAME_CASE(VNSRL_VL)
16253 NODE_NAME_CASE(SETCC_VL)
16254 NODE_NAME_CASE(VSELECT_VL)
16255 NODE_NAME_CASE(VP_MERGE_VL)
16256 NODE_NAME_CASE(VMAND_VL)
16257 NODE_NAME_CASE(VMOR_VL)
16258 NODE_NAME_CASE(VMXOR_VL)
16259 NODE_NAME_CASE(VMCLR_VL)
16260 NODE_NAME_CASE(VMSET_VL)
16261 NODE_NAME_CASE(VRGATHER_VX_VL)
16262 NODE_NAME_CASE(VRGATHER_VV_VL)
16263 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
16264 NODE_NAME_CASE(VSEXT_VL)
16265 NODE_NAME_CASE(VZEXT_VL)
16266 NODE_NAME_CASE(VCPOP_VL)
16267 NODE_NAME_CASE(VFIRST_VL)
16268 NODE_NAME_CASE(READ_CSR)
16269 NODE_NAME_CASE(WRITE_CSR)
16270 NODE_NAME_CASE(SWAP_CSR)
16271 NODE_NAME_CASE(CZERO_EQZ)
16272 NODE_NAME_CASE(CZERO_NEZ)
16273 }
16274 // clang-format on
16275 return nullptr;
16276#undef NODE_NAME_CASE
16277}
16278
16279/// getConstraintType - Given a constraint letter, return the type of
16280/// constraint it is for this target.
16283 if (Constraint.size() == 1) {
16284 switch (Constraint[0]) {
16285 default:
16286 break;
16287 case 'f':
16288 return C_RegisterClass;
16289 case 'I':
16290 case 'J':
16291 case 'K':
16292 return C_Immediate;
16293 case 'A':
16294 return C_Memory;
16295 case 'S': // A symbolic address
16296 return C_Other;
16297 }
16298 } else {
16299 if (Constraint == "vr" || Constraint == "vm")
16300 return C_RegisterClass;
16301 }
16302 return TargetLowering::getConstraintType(Constraint);
16303}
16304
16305std::pair<unsigned, const TargetRegisterClass *>
16307 StringRef Constraint,
16308 MVT VT) const {
16309 // First, see if this is a constraint that directly corresponds to a RISC-V
16310 // register class.
16311 if (Constraint.size() == 1) {
16312 switch (Constraint[0]) {
16313 case 'r':
16314 // TODO: Support fixed vectors up to XLen for P extension?
16315 if (VT.isVector())
16316 break;
16317 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
16318 case 'f':
16319 if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16)
16320 return std::make_pair(0U, &RISCV::FPR16RegClass);
16321 if (Subtarget.hasStdExtF() && VT == MVT::f32)
16322 return std::make_pair(0U, &RISCV::FPR32RegClass);
16323 if (Subtarget.hasStdExtD() && VT == MVT::f64)
16324 return std::make_pair(0U, &RISCV::FPR64RegClass);
16325 break;
16326 default:
16327 break;
16328 }
16329 } else if (Constraint == "vr") {
16330 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
16331 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
16332 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
16333 return std::make_pair(0U, RC);
16334 }
16335 } else if (Constraint == "vm") {
16336 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
16337 return std::make_pair(0U, &RISCV::VMV0RegClass);
16338 }
16339
16340 // Clang will correctly decode the usage of register name aliases into their
16341 // official names. However, other frontends like `rustc` do not. This allows
16342 // users of these frontends to use the ABI names for registers in LLVM-style
16343 // register constraints.
16344 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
16345 .Case("{zero}", RISCV::X0)
16346 .Case("{ra}", RISCV::X1)
16347 .Case("{sp}", RISCV::X2)
16348 .Case("{gp}", RISCV::X3)
16349 .Case("{tp}", RISCV::X4)
16350 .Case("{t0}", RISCV::X5)
16351 .Case("{t1}", RISCV::X6)
16352 .Case("{t2}", RISCV::X7)
16353 .Cases("{s0}", "{fp}", RISCV::X8)
16354 .Case("{s1}", RISCV::X9)
16355 .Case("{a0}", RISCV::X10)
16356 .Case("{a1}", RISCV::X11)
16357 .Case("{a2}", RISCV::X12)
16358 .Case("{a3}", RISCV::X13)
16359 .Case("{a4}", RISCV::X14)
16360 .Case("{a5}", RISCV::X15)
16361 .Case("{a6}", RISCV::X16)
16362 .Case("{a7}", RISCV::X17)
16363 .Case("{s2}", RISCV::X18)
16364 .Case("{s3}", RISCV::X19)
16365 .Case("{s4}", RISCV::X20)
16366 .Case("{s5}", RISCV::X21)
16367 .Case("{s6}", RISCV::X22)
16368 .Case("{s7}", RISCV::X23)
16369 .Case("{s8}", RISCV::X24)
16370 .Case("{s9}", RISCV::X25)
16371 .Case("{s10}", RISCV::X26)
16372 .Case("{s11}", RISCV::X27)
16373 .Case("{t3}", RISCV::X28)
16374 .Case("{t4}", RISCV::X29)
16375 .Case("{t5}", RISCV::X30)
16376 .Case("{t6}", RISCV::X31)
16377 .Default(RISCV::NoRegister);
16378 if (XRegFromAlias != RISCV::NoRegister)
16379 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
16380
16381 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
16382 // TableGen record rather than the AsmName to choose registers for InlineAsm
16383 // constraints, plus we want to match those names to the widest floating point
16384 // register type available, manually select floating point registers here.
16385 //
16386 // The second case is the ABI name of the register, so that frontends can also
16387 // use the ABI names in register constraint lists.
16388 if (Subtarget.hasStdExtF()) {
16389 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
16390 .Cases("{f0}", "{ft0}", RISCV::F0_F)
16391 .Cases("{f1}", "{ft1}", RISCV::F1_F)
16392 .Cases("{f2}", "{ft2}", RISCV::F2_F)
16393 .Cases("{f3}", "{ft3}", RISCV::F3_F)
16394 .Cases("{f4}", "{ft4}", RISCV::F4_F)
16395 .Cases("{f5}", "{ft5}", RISCV::F5_F)
16396 .Cases("{f6}", "{ft6}", RISCV::F6_F)
16397 .Cases("{f7}", "{ft7}", RISCV::F7_F)
16398 .Cases("{f8}", "{fs0}", RISCV::F8_F)
16399 .Cases("{f9}", "{fs1}", RISCV::F9_F)
16400 .Cases("{f10}", "{fa0}", RISCV::F10_F)
16401 .Cases("{f11}", "{fa1}", RISCV::F11_F)
16402 .Cases("{f12}", "{fa2}", RISCV::F12_F)
16403 .Cases("{f13}", "{fa3}", RISCV::F13_F)
16404 .Cases("{f14}", "{fa4}", RISCV::F14_F)
16405 .Cases("{f15}", "{fa5}", RISCV::F15_F)
16406 .Cases("{f16}", "{fa6}", RISCV::F16_F)
16407 .Cases("{f17}", "{fa7}", RISCV::F17_F)
16408 .Cases("{f18}", "{fs2}", RISCV::F18_F)
16409 .Cases("{f19}", "{fs3}", RISCV::F19_F)
16410 .Cases("{f20}", "{fs4}", RISCV::F20_F)
16411 .Cases("{f21}", "{fs5}", RISCV::F21_F)
16412 .Cases("{f22}", "{fs6}", RISCV::F22_F)
16413 .Cases("{f23}", "{fs7}", RISCV::F23_F)
16414 .Cases("{f24}", "{fs8}", RISCV::F24_F)
16415 .Cases("{f25}", "{fs9}", RISCV::F25_F)
16416 .Cases("{f26}", "{fs10}", RISCV::F26_F)
16417 .Cases("{f27}", "{fs11}", RISCV::F27_F)
16418 .Cases("{f28}", "{ft8}", RISCV::F28_F)
16419 .Cases("{f29}", "{ft9}", RISCV::F29_F)
16420 .Cases("{f30}", "{ft10}", RISCV::F30_F)
16421 .Cases("{f31}", "{ft11}", RISCV::F31_F)
16422 .Default(RISCV::NoRegister);
16423 if (FReg != RISCV::NoRegister) {
16424 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
16425 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
16426 unsigned RegNo = FReg - RISCV::F0_F;
16427 unsigned DReg = RISCV::F0_D + RegNo;
16428 return std::make_pair(DReg, &RISCV::FPR64RegClass);
16429 }
16430 if (VT == MVT::f32 || VT == MVT::Other)
16431 return std::make_pair(FReg, &RISCV::FPR32RegClass);
16432 if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) {
16433 unsigned RegNo = FReg - RISCV::F0_F;
16434 unsigned HReg = RISCV::F0_H + RegNo;
16435 return std::make_pair(HReg, &RISCV::FPR16RegClass);
16436 }
16437 }
16438 }
16439
16440 if (Subtarget.hasVInstructions()) {
16441 Register VReg = StringSwitch<Register>(Constraint.lower())
16442 .Case("{v0}", RISCV::V0)
16443 .Case("{v1}", RISCV::V1)
16444 .Case("{v2}", RISCV::V2)
16445 .Case("{v3}", RISCV::V3)
16446 .Case("{v4}", RISCV::V4)
16447 .Case("{v5}", RISCV::V5)
16448 .Case("{v6}", RISCV::V6)
16449 .Case("{v7}", RISCV::V7)
16450 .Case("{v8}", RISCV::V8)
16451 .Case("{v9}", RISCV::V9)
16452 .Case("{v10}", RISCV::V10)
16453 .Case("{v11}", RISCV::V11)
16454 .Case("{v12}", RISCV::V12)
16455 .Case("{v13}", RISCV::V13)
16456 .Case("{v14}", RISCV::V14)
16457 .Case("{v15}", RISCV::V15)
16458 .Case("{v16}", RISCV::V16)
16459 .Case("{v17}", RISCV::V17)
16460 .Case("{v18}", RISCV::V18)
16461 .Case("{v19}", RISCV::V19)
16462 .Case("{v20}", RISCV::V20)
16463 .Case("{v21}", RISCV::V21)
16464 .Case("{v22}", RISCV::V22)
16465 .Case("{v23}", RISCV::V23)
16466 .Case("{v24}", RISCV::V24)
16467 .Case("{v25}", RISCV::V25)
16468 .Case("{v26}", RISCV::V26)
16469 .Case("{v27}", RISCV::V27)
16470 .Case("{v28}", RISCV::V28)
16471 .Case("{v29}", RISCV::V29)
16472 .Case("{v30}", RISCV::V30)
16473 .Case("{v31}", RISCV::V31)
16474 .Default(RISCV::NoRegister);
16475 if (VReg != RISCV::NoRegister) {
16476 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
16477 return std::make_pair(VReg, &RISCV::VMRegClass);
16478 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
16479 return std::make_pair(VReg, &RISCV::VRRegClass);
16480 for (const auto *RC :
16481 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
16482 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
16483 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
16484 return std::make_pair(VReg, RC);
16485 }
16486 }
16487 }
16488 }
16489
16490 std::pair<Register, const TargetRegisterClass *> Res =
16492
16493 // If we picked one of the Zfinx register classes, remap it to the GPR class.
16494 // FIXME: When Zfinx is supported in CodeGen this will need to take the
16495 // Subtarget into account.
16496 if (Res.second == &RISCV::GPRF16RegClass ||
16497 Res.second == &RISCV::GPRF32RegClass ||
16498 Res.second == &RISCV::GPRPF64RegClass)
16499 return std::make_pair(Res.first, &RISCV::GPRRegClass);
16500
16501 return Res;
16502}
16503
16504unsigned
16506 // Currently only support length 1 constraints.
16507 if (ConstraintCode.size() == 1) {
16508 switch (ConstraintCode[0]) {
16509 case 'A':
16511 default:
16512 break;
16513 }
16514 }
16515
16516 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
16517}
16518
16520 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
16521 SelectionDAG &DAG) const {
16522 // Currently only support length 1 constraints.
16523 if (Constraint.length() == 1) {
16524 switch (Constraint[0]) {
16525 case 'I':
16526 // Validate & create a 12-bit signed immediate operand.
16527 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
16528 uint64_t CVal = C->getSExtValue();
16529 if (isInt<12>(CVal))
16530 Ops.push_back(
16531 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
16532 }
16533 return;
16534 case 'J':
16535 // Validate & create an integer zero operand.
16536 if (isNullConstant(Op))
16537 Ops.push_back(
16538 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
16539 return;
16540 case 'K':
16541 // Validate & create a 5-bit unsigned immediate operand.
16542 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
16543 uint64_t CVal = C->getZExtValue();
16544 if (isUInt<5>(CVal))
16545 Ops.push_back(
16546 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
16547 }
16548 return;
16549 case 'S':
16550 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
16551 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
16552 GA->getValueType(0)));
16553 } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
16554 Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
16555 BA->getValueType(0)));
16556 }
16557 return;
16558 default:
16559 break;
16560 }
16561 }
16562 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16563}
16564
16566 Instruction *Inst,
16567 AtomicOrdering Ord) const {
16568 if (Subtarget.hasStdExtZtso()) {
16570 return Builder.CreateFence(Ord);
16571 return nullptr;
16572 }
16573
16575 return Builder.CreateFence(Ord);
16576 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
16577 return Builder.CreateFence(AtomicOrdering::Release);
16578 return nullptr;
16579}
16580
16582 Instruction *Inst,
16583 AtomicOrdering Ord) const {
16584 if (Subtarget.hasStdExtZtso())
16585 return nullptr;
16586
16587 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
16588 return Builder.CreateFence(AtomicOrdering::Acquire);
16589 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
16591 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
16592 return nullptr;
16593}
16594
16597 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
16598 // point operations can't be used in an lr/sc sequence without breaking the
16599 // forward-progress guarantee.
16600 if (AI->isFloatingPointOperation() ||
16604
16605 // Don't expand forced atomics, we want to have __sync libcalls instead.
16606 if (Subtarget.hasForcedAtomics())
16608
16609 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
16610 if (Size == 8 || Size == 16)
16613}
16614
16615static Intrinsic::ID
16617 if (XLen == 32) {
16618 switch (BinOp) {
16619 default:
16620 llvm_unreachable("Unexpected AtomicRMW BinOp");
16622 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
16623 case AtomicRMWInst::Add:
16624 return Intrinsic::riscv_masked_atomicrmw_add_i32;
16625 case AtomicRMWInst::Sub:
16626 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
16628 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
16629 case AtomicRMWInst::Max:
16630 return Intrinsic::riscv_masked_atomicrmw_max_i32;
16631 case AtomicRMWInst::Min:
16632 return Intrinsic::riscv_masked_atomicrmw_min_i32;
16634 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
16636 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
16637 }
16638 }
16639
16640 if (XLen == 64) {
16641 switch (BinOp) {
16642 default:
16643 llvm_unreachable("Unexpected AtomicRMW BinOp");
16645 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
16646 case AtomicRMWInst::Add:
16647 return Intrinsic::riscv_masked_atomicrmw_add_i64;
16648 case AtomicRMWInst::Sub:
16649 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
16651 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
16652 case AtomicRMWInst::Max:
16653 return Intrinsic::riscv_masked_atomicrmw_max_i64;
16654 case AtomicRMWInst::Min:
16655 return Intrinsic::riscv_masked_atomicrmw_min_i64;
16657 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
16659 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
16660 }
16661 }
16662
16663 llvm_unreachable("Unexpected XLen\n");
16664}
16665
16667 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
16668 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
16669 unsigned XLen = Subtarget.getXLen();
16670 Value *Ordering =
16671 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
16672 Type *Tys[] = {AlignedAddr->getType()};
16673 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
16674 AI->getModule(),
16676
16677 if (XLen == 64) {
16678 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
16679 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
16680 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
16681 }
16682
16683 Value *Result;
16684
16685 // Must pass the shift amount needed to sign extend the loaded value prior
16686 // to performing a signed comparison for min/max. ShiftAmt is the number of
16687 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
16688 // is the number of bits to left+right shift the value in order to
16689 // sign-extend.
16690 if (AI->getOperation() == AtomicRMWInst::Min ||
16692 const DataLayout &DL = AI->getModule()->getDataLayout();
16693 unsigned ValWidth =
16694 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
16695 Value *SextShamt =
16696 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
16697 Result = Builder.CreateCall(LrwOpScwLoop,
16698 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
16699 } else {
16700 Result =
16701 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
16702 }
16703
16704 if (XLen == 64)
16705 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
16706 return Result;
16707}
16708
16711 AtomicCmpXchgInst *CI) const {
16712 // Don't expand forced atomics, we want to have __sync libcalls instead.
16713 if (Subtarget.hasForcedAtomics())
16715
16717 if (Size == 8 || Size == 16)
16720}
16721
16723 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
16724 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
16725 unsigned XLen = Subtarget.getXLen();
16726 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
16727 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
16728 if (XLen == 64) {
16729 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
16730 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
16731 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
16732 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
16733 }
16734 Type *Tys[] = {AlignedAddr->getType()};
16735 Function *MaskedCmpXchg =
16736 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
16737 Value *Result = Builder.CreateCall(
16738 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
16739 if (XLen == 64)
16740 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
16741 return Result;
16742}
16743
16745 EVT DataVT) const {
16746 return false;
16747}
16748
16750 EVT VT) const {
16751 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
16752 return false;
16753
16754 switch (FPVT.getSimpleVT().SimpleTy) {
16755 case MVT::f16:
16756 return Subtarget.hasStdExtZfhOrZfhmin();
16757 case MVT::f32:
16758 return Subtarget.hasStdExtF();
16759 case MVT::f64:
16760 return Subtarget.hasStdExtD();
16761 default:
16762 return false;
16763 }
16764}
16765
16767 // If we are using the small code model, we can reduce size of jump table
16768 // entry to 4 bytes.
16769 if (Subtarget.is64Bit() && !isPositionIndependent() &&
16770 getTargetMachine().getCodeModel() == CodeModel::Small) {
16772 }
16774}
16775
16777 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
16778 unsigned uid, MCContext &Ctx) const {
16779 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
16780 getTargetMachine().getCodeModel() == CodeModel::Small);
16781 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
16782}
16783
16785 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
16786 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
16787 // a power of two as well.
16788 // FIXME: This doesn't work for zve32, but that's already broken
16789 // elsewhere for the same reason.
16790 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
16791 static_assert(RISCV::RVVBitsPerBlock == 64,
16792 "RVVBitsPerBlock changed, audit needed");
16793 return true;
16794}
16795
16797 SDValue &Offset,
16799 bool &IsInc,
16800 SelectionDAG &DAG) const {
16801 // Target does not support indexed loads.
16802 if (!Subtarget.hasVendorXTHeadMemIdx())
16803 return false;
16804
16805 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
16806 return false;
16807
16808 Base = Op->getOperand(0);
16809 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
16810 int64_t RHSC = RHS->getSExtValue();
16811 if (Op->getOpcode() == ISD::SUB)
16812 RHSC = -(uint64_t)RHSC;
16813
16814 // The constants that can be encoded in the THeadMemIdx instructions
16815 // are of the form (sign_extend(imm5) << imm2).
16816 bool isLegalIndexedOffset = false;
16817 for (unsigned i = 0; i < 4; i++)
16818 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
16819 isLegalIndexedOffset = true;
16820 break;
16821 }
16822
16823 if (!isLegalIndexedOffset)
16824 return false;
16825
16826 IsInc = (Op->getOpcode() == ISD::ADD);
16827 Offset = Op->getOperand(1);
16828 return true;
16829 }
16830
16831 return false;
16832}
16833
16835 SDValue &Offset,
16837 SelectionDAG &DAG) const {
16838 EVT VT;
16839 SDValue Ptr;
16840 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
16841 VT = LD->getMemoryVT();
16842 Ptr = LD->getBasePtr();
16843 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
16844 VT = ST->getMemoryVT();
16845 Ptr = ST->getBasePtr();
16846 } else
16847 return false;
16848
16849 bool IsInc;
16850 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
16851 return false;
16852
16853 AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
16854 return true;
16855}
16856
16858 SDValue &Base,
16859 SDValue &Offset,
16861 SelectionDAG &DAG) const {
16862 EVT VT;
16863 SDValue Ptr;
16864 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
16865 VT = LD->getMemoryVT();
16866 Ptr = LD->getBasePtr();
16867 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
16868 VT = ST->getMemoryVT();
16869 Ptr = ST->getBasePtr();
16870 } else
16871 return false;
16872
16873 bool IsInc;
16874 if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
16875 return false;
16876 // Post-indexing updates the base, so it's not a valid transform
16877 // if that's not the same as the load's pointer.
16878 if (Ptr != Base)
16879 return false;
16880
16881 AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
16882 return true;
16883}
16884
16886 EVT VT) const {
16887 EVT SVT = VT.getScalarType();
16888
16889 if (!SVT.isSimple())
16890 return false;
16891
16892 switch (SVT.getSimpleVT().SimpleTy) {
16893 case MVT::f16:
16894 return VT.isVector() ? Subtarget.hasVInstructionsF16()
16895 : Subtarget.hasStdExtZfhOrZhinx();
16896 case MVT::f32:
16897 return Subtarget.hasStdExtFOrZfinx();
16898 case MVT::f64:
16899 return Subtarget.hasStdExtDOrZdinx();
16900 default:
16901 break;
16902 }
16903
16904 return false;
16905}
16906
16908 const Constant *PersonalityFn) const {
16909 return RISCV::X10;
16910}
16911
16913 const Constant *PersonalityFn) const {
16914 return RISCV::X11;
16915}
16916
16918 // Return false to suppress the unnecessary extensions if the LibCall
16919 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
16920 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
16921 Type.getSizeInBits() < Subtarget.getXLen()))
16922 return false;
16923
16924 return true;
16925}
16926
16928 if (Subtarget.is64Bit() && Type == MVT::i32)
16929 return true;
16930
16931 return IsSigned;
16932}
16933
16935 SDValue C) const {
16936 // Check integral scalar types.
16937 const bool HasExtMOrZmmul =
16938 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
16939 if (!VT.isScalarInteger())
16940 return false;
16941
16942 // Omit the optimization if the sub target has the M extension and the data
16943 // size exceeds XLen.
16944 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
16945 return false;
16946
16947 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16948 // Break the MUL to a SLLI and an ADD/SUB.
16949 const APInt &Imm = ConstNode->getAPIntValue();
16950 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
16951 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
16952 return true;
16953
16954 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
16955 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
16956 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
16957 (Imm - 8).isPowerOf2()))
16958 return true;
16959
16960 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
16961 // a pair of LUI/ADDI.
16962 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
16963 ConstNode->hasOneUse()) {
16964 APInt ImmS = Imm.ashr(Imm.countr_zero());
16965 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
16966 (1 - ImmS).isPowerOf2())
16967 return true;
16968 }
16969 }
16970
16971 return false;
16972}
16973
16975 SDValue ConstNode) const {
16976 // Let the DAGCombiner decide for vectors.
16977 EVT VT = AddNode.getValueType();
16978 if (VT.isVector())
16979 return true;
16980
16981 // Let the DAGCombiner decide for larger types.
16982 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
16983 return true;
16984
16985 // It is worse if c1 is simm12 while c1*c2 is not.
16986 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
16987 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
16988 const APInt &C1 = C1Node->getAPIntValue();
16989 const APInt &C2 = C2Node->getAPIntValue();
16990 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
16991 return false;
16992
16993 // Default to true and let the DAGCombiner decide.
16994 return true;
16995}
16996
16998 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
16999 unsigned *Fast) const {
17000 if (!VT.isVector()) {
17001 if (Fast)
17002 *Fast = Subtarget.enableUnalignedScalarMem();
17003 return Subtarget.enableUnalignedScalarMem();
17004 }
17005
17006 // All vector implementations must support element alignment
17007 EVT ElemVT = VT.getVectorElementType();
17008 if (Alignment >= ElemVT.getStoreSize()) {
17009 if (Fast)
17010 *Fast = 1;
17011 return true;
17012 }
17013
17014 // Note: We lower an unmasked unaligned vector access to an equally sized
17015 // e8 element type access. Given this, we effectively support all unmasked
17016 // misaligned accesses. TODO: Work through the codegen implications of
17017 // allowing such accesses to be formed, and considered fast.
17018 if (Fast)
17019 *Fast = Subtarget.enableUnalignedVectorMem();
17020 return Subtarget.enableUnalignedVectorMem();
17021}
17022
17024 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
17025 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
17026 bool IsABIRegCopy = CC.has_value();
17027 EVT ValueVT = Val.getValueType();
17028 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
17029 PartVT == MVT::f32) {
17030 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
17031 // nan, and cast to f32.
17032 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
17033 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
17034 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
17035 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
17036 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
17037 Parts[0] = Val;
17038 return true;
17039 }
17040
17041 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
17042 LLVMContext &Context = *DAG.getContext();
17043 EVT ValueEltVT = ValueVT.getVectorElementType();
17044 EVT PartEltVT = PartVT.getVectorElementType();
17045 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
17046 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
17047 if (PartVTBitSize % ValueVTBitSize == 0) {
17048 assert(PartVTBitSize >= ValueVTBitSize);
17049 // If the element types are different, bitcast to the same element type of
17050 // PartVT first.
17051 // Give an example here, we want copy a <vscale x 1 x i8> value to
17052 // <vscale x 4 x i16>.
17053 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
17054 // subvector, then we can bitcast to <vscale x 4 x i16>.
17055 if (ValueEltVT != PartEltVT) {
17056 if (PartVTBitSize > ValueVTBitSize) {
17057 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
17058 assert(Count != 0 && "The number of element should not be zero.");
17059 EVT SameEltTypeVT =
17060 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
17061 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
17062 DAG.getUNDEF(SameEltTypeVT), Val,
17063 DAG.getVectorIdxConstant(0, DL));
17064 }
17065 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
17066 } else {
17067 Val =
17068 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
17069 Val, DAG.getVectorIdxConstant(0, DL));
17070 }
17071 Parts[0] = Val;
17072 return true;
17073 }
17074 }
17075 return false;
17076}
17077
17079 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
17080 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
17081 bool IsABIRegCopy = CC.has_value();
17082 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
17083 PartVT == MVT::f32) {
17084 SDValue Val = Parts[0];
17085
17086 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
17087 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
17088 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
17089 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
17090 return Val;
17091 }
17092
17093 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
17094 LLVMContext &Context = *DAG.getContext();
17095 SDValue Val = Parts[0];
17096 EVT ValueEltVT = ValueVT.getVectorElementType();
17097 EVT PartEltVT = PartVT.getVectorElementType();
17098 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
17099 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
17100 if (PartVTBitSize % ValueVTBitSize == 0) {
17101 assert(PartVTBitSize >= ValueVTBitSize);
17102 EVT SameEltTypeVT = ValueVT;
17103 // If the element types are different, convert it to the same element type
17104 // of PartVT.
17105 // Give an example here, we want copy a <vscale x 1 x i8> value from
17106 // <vscale x 4 x i16>.
17107 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
17108 // then we can extract <vscale x 1 x i8>.
17109 if (ValueEltVT != PartEltVT) {
17110 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
17111 assert(Count != 0 && "The number of element should not be zero.");
17112 SameEltTypeVT =
17113 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
17114 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
17115 }
17116 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
17117 DAG.getVectorIdxConstant(0, DL));
17118 return Val;
17119 }
17120 }
17121 return SDValue();
17122}
17123
17125 // When aggressively optimizing for code size, we prefer to use a div
17126 // instruction, as it is usually smaller than the alternative sequence.
17127 // TODO: Add vector division?
17128 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
17129 return OptSize && !VT.isVector();
17130}
17131
17133 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
17134 // some situation.
17135 unsigned Opc = N->getOpcode();
17136 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
17137 return false;
17138 return true;
17139}
17140
17141static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
17142 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
17143 Function *ThreadPointerFunc =
17144 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
17145 return IRB.CreatePointerCast(
17146 IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
17147 IRB.CreateCall(ThreadPointerFunc), Offset),
17148 IRB.getInt8PtrTy()->getPointerTo(0));
17149}
17150
17152 // Fuchsia provides a fixed TLS slot for the stack cookie.
17153 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
17154 if (Subtarget.isTargetFuchsia())
17155 return useTpOffset(IRB, -0x10);
17156
17158}
17159
17161 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
17162 const DataLayout &DL) const {
17163 EVT VT = getValueType(DL, VTy);
17164 // Don't lower vlseg/vsseg for vector types that can't be split.
17165 if (!isTypeLegal(VT))
17166 return false;
17167
17169 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
17170 Alignment))
17171 return false;
17172
17173 MVT ContainerVT = VT.getSimpleVT();
17174
17175 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
17176 if (!Subtarget.useRVVForFixedLengthVectors())
17177 return false;
17178 // Sometimes the interleaved access pass picks up splats as interleaves of
17179 // one element. Don't lower these.
17180 if (FVTy->getNumElements() < 2)
17181 return false;
17182
17184 }
17185
17186 // Need to make sure that EMUL * NFIELDS ≤ 8
17187 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
17188 if (Fractional)
17189 return true;
17190 return Factor * LMUL <= 8;
17191}
17192
17194 Align Alignment) const {
17195 if (!Subtarget.hasVInstructions())
17196 return false;
17197
17198 // Only support fixed vectors if we know the minimum vector size.
17199 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
17200 return false;
17201
17202 EVT ScalarType = DataType.getScalarType();
17203 if (!isLegalElementTypeForRVV(ScalarType))
17204 return false;
17205
17206 if (!Subtarget.enableUnalignedVectorMem() &&
17207 Alignment < ScalarType.getStoreSize())
17208 return false;
17209
17210 return true;
17211}
17212
17214 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
17215 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
17216 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
17217 Intrinsic::riscv_seg8_load};
17218
17219/// Lower an interleaved load into a vlsegN intrinsic.
17220///
17221/// E.g. Lower an interleaved load (Factor = 2):
17222/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
17223/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
17224/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
17225///
17226/// Into:
17227/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
17228/// %ptr, i64 4)
17229/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
17230/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
17233 ArrayRef<unsigned> Indices, unsigned Factor) const {
17234 IRBuilder<> Builder(LI);
17235
17236 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
17237 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
17239 LI->getModule()->getDataLayout()))
17240 return false;
17241
17242 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
17243
17244 Function *VlsegNFunc =
17246 {VTy, LI->getPointerOperandType(), XLenTy});
17247
17248 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
17249
17250 CallInst *VlsegN =
17251 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
17252
17253 for (unsigned i = 0; i < Shuffles.size(); i++) {
17254 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
17255 Shuffles[i]->replaceAllUsesWith(SubVec);
17256 }
17257
17258 return true;
17259}
17260
17262 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
17263 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
17264 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
17265 Intrinsic::riscv_seg8_store};
17266
17267/// Lower an interleaved store into a vssegN intrinsic.
17268///
17269/// E.g. Lower an interleaved store (Factor = 3):
17270/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
17271/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
17272/// store <12 x i32> %i.vec, <12 x i32>* %ptr
17273///
17274/// Into:
17275/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
17276/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
17277/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
17278/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
17279/// %ptr, i32 4)
17280///
17281/// Note that the new shufflevectors will be removed and we'll only generate one
17282/// vsseg3 instruction in CodeGen.
17284 ShuffleVectorInst *SVI,
17285 unsigned Factor) const {
17286 IRBuilder<> Builder(SI);
17287 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
17288 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
17289 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
17290 ShuffleVTy->getNumElements() / Factor);
17291 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
17292 SI->getPointerAddressSpace(),
17293 SI->getModule()->getDataLayout()))
17294 return false;
17295
17296 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
17297
17298 Function *VssegNFunc =
17299 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
17300 {VTy, SI->getPointerOperandType(), XLenTy});
17301
17302 auto Mask = SVI->getShuffleMask();
17304
17305 for (unsigned i = 0; i < Factor; i++) {
17306 Value *Shuffle = Builder.CreateShuffleVector(
17307 SVI->getOperand(0), SVI->getOperand(1),
17308 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
17309 Ops.push_back(Shuffle);
17310 }
17311 // This VL should be OK (should be executable in one vsseg instruction,
17312 // potentially under larger LMULs) because we checked that the fixed vector
17313 // type fits in isLegalInterleavedAccessType
17314 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
17315 Ops.append({SI->getPointerOperand(), VL});
17316
17317 Builder.CreateCall(VssegNFunc, Ops);
17318
17319 return true;
17320}
17321
17323 LoadInst *LI) const {
17324 assert(LI->isSimple());
17325 IRBuilder<> Builder(LI);
17326
17327 // Only deinterleave2 supported at present.
17328 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
17329 return false;
17330
17331 unsigned Factor = 2;
17332
17335
17336 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
17338 LI->getModule()->getDataLayout()))
17339 return false;
17340
17341 Function *VlsegNFunc;
17342 Value *VL;
17343 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
17345
17346 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
17347 VlsegNFunc = Intrinsic::getDeclaration(
17348 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
17349 {ResVTy, LI->getPointerOperandType(), XLenTy});
17350 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
17351 } else {
17352 static const Intrinsic::ID IntrIds[] = {
17353 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
17354 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
17355 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
17356 Intrinsic::riscv_vlseg8};
17357
17358 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
17359 {ResVTy, XLenTy});
17360 VL = Constant::getAllOnesValue(XLenTy);
17361 Ops.append(Factor, PoisonValue::get(ResVTy));
17362 }
17363
17364 Ops.append({LI->getPointerOperand(), VL});
17365
17366 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
17367 DI->replaceAllUsesWith(Vlseg);
17368
17369 return true;
17370}
17371
17373 StoreInst *SI) const {
17374 assert(SI->isSimple());
17375 IRBuilder<> Builder(SI);
17376
17377 // Only interleave2 supported at present.
17378 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
17379 return false;
17380
17381 unsigned Factor = 2;
17382
17383 VectorType *VTy = cast<VectorType>(II->getType());
17384 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
17385
17386 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
17387 SI->getPointerAddressSpace(),
17388 SI->getModule()->getDataLayout()))
17389 return false;
17390
17391 Function *VssegNFunc;
17392 Value *VL;
17393 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
17394
17395 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
17396 VssegNFunc = Intrinsic::getDeclaration(
17397 SI->getModule(), FixedVssegIntrIds[Factor - 2],
17398 {InVTy, SI->getPointerOperandType(), XLenTy});
17399 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
17400 } else {
17401 static const Intrinsic::ID IntrIds[] = {
17402 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
17403 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
17404 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
17405 Intrinsic::riscv_vsseg8};
17406
17407 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
17408 {InVTy, XLenTy});
17409 VL = Constant::getAllOnesValue(XLenTy);
17410 }
17411
17412 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
17413 SI->getPointerOperand(), VL});
17414
17415 return true;
17416}
17417
17421 const TargetInstrInfo *TII) const {
17422 assert(MBBI->isCall() && MBBI->getCFIType() &&
17423 "Invalid call instruction for a KCFI check");
17424 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
17425 MBBI->getOpcode()));
17426
17427 MachineOperand &Target = MBBI->getOperand(0);
17428 Target.setIsRenamable(false);
17429
17430 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
17431 .addReg(Target.getReg())
17432 .addImm(MBBI->getCFIType())
17433 .getInstr();
17434}
17435
17436#define GET_REGISTER_MATCHER
17437#include "RISCVGenAsmMatcher.inc"
17438
17441 const MachineFunction &MF) const {
17443 if (Reg == RISCV::NoRegister)
17445 if (Reg == RISCV::NoRegister)
17447 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
17448 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
17449 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
17450 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
17451 StringRef(RegName) + "\"."));
17452 return Reg;
17453}
17454
17457 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
17458
17459 if (NontemporalInfo == nullptr)
17461
17462 // 1 for default value work as __RISCV_NTLH_ALL
17463 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
17464 // 3 -> __RISCV_NTLH_ALL_PRIVATE
17465 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
17466 // 5 -> __RISCV_NTLH_ALL
17467 int NontemporalLevel = 5;
17468 const MDNode *RISCVNontemporalInfo =
17469 I.getMetadata("riscv-nontemporal-domain");
17470 if (RISCVNontemporalInfo != nullptr)
17471 NontemporalLevel =
17473 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
17474 ->getValue())
17475 ->getZExtValue();
17476
17477 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
17478 "RISC-V target doesn't support this non-temporal domain.");
17479
17480 NontemporalLevel -= 2;
17482 if (NontemporalLevel & 0b1)
17483 Flags |= MONontemporalBit0;
17484 if (NontemporalLevel & 0b10)
17485 Flags |= MONontemporalBit1;
17486
17487 return Flags;
17488}
17489
17492
17493 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
17495 TargetFlags |= (NodeFlags & MONontemporalBit0);
17496 TargetFlags |= (NodeFlags & MONontemporalBit1);
17497
17498 return TargetFlags;
17499}
17500
17502 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
17503 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
17504}
17505
17507
17508#define GET_RISCVVIntrinsicsTable_IMPL
17509#include "RISCVGenSearchableTables.inc"
17510
17511} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static unsigned MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define NODE_NAME_CASE(node)
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static unsigned MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
assume Assume Builder
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define LLVM_DEBUG(X)
Definition Debug.h:101
#define NL
uint64_t Align
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
#define Check(C,...)
Definition Lint.cpp:168
const MCPhysReg ArgFPR32s[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
unsigned Reg
Promote Memory to Register
Definition Mem2Reg.cpp:114
This file provides utility analysis objects describing memory locations.
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
R600 Clause Merge
static StringRef getExtensionType(StringRef Ext)
static SDValue selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SelectionDAG &DAG)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgVRs[]
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static MVT getLMUL1VT(MVT VT)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc, unsigned CVTFOpc)
static MachineBasicBlock * emitReadCycleWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM2s[]
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static std::optional< unsigned > getRoundModeIdx(const MachineInstr &MI)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< uint64_t > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue narrowIndex(SDValue N, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static MVT getMaskTypeFor(MVT VecVT)
Return the type of the mask type suitable for masking the provided vector type.
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
#define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISCV target specified VL op for a given SDNode.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo, std::optional< unsigned > FirstMaskArgument, CCState &State, const RISCVTargetLowering &TLI)
static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static bool isSelectPseudo(MachineInstr &MI)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Combine a binary operation to its equivalent VW or VW_W form.
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
#define PseudoVFCVT_RM_CASE(RMOpc, Opc)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMergeOp(unsigned Opcode)
Return true if a RISC-V target specified op has a merge operand.
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
#define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned getRVVVPReductionOp(unsigned ISDOpcode)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static const MCPhysReg ArgVRM8s[]
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
#define OP_CASE(NODE)
static MachineBasicBlock * emitVFCVT_RM(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static const Intrinsic::ID FixedVssegIntrIds[]
static bool isNonZeroAVL(const MachineOperand &MO)
unsigned SEW
uint64_t TSFlags
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
bool isNegative() const
Definition APFloat.h:1276
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1174
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1166
Class for arbitrary precision integers.
Definition APInt.h:76
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1485
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1358
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition APInt.cpp:489
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1457
APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1302
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:349
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:358
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:307
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1369
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1583
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:413
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1476
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:368
APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1229
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:274
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1361
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1507
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:829
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Definition ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
bool isFloatingPointOperation() const
BinOp getOperation() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This class holds the attributes for a function, its return value, and its parameters.
Definition Attributes.h:435
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:56
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:112
bool test(unsigned Idx) const
Definition BitVector.h:461
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:78
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition Constants.h:176
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:145
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition DataLayout.h:406
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:33
unsigned size() const
Definition DenseMap.h:99
iterator begin()
Definition DenseMap.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:220
Implements a dense probed hash-table based set.
Definition DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:748
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:174
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:670
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:237
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:313
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:319
Argument * getArg(unsigned i) const
Definition Function.h:794
bool isDSOLocal() const
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:94
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:1839
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2097
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:174
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition IRBuilder.h:560
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2336
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:502
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2595
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:76
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition MCExpr.h:388
Metadata node.
Definition Metadata.h:950
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1303
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineInstr - Allocate a new MachineInstr.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.cpp:398
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtZfhOrZfhminOrZhinxOrZhinxmin() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasStdExtZfhOrZfhmin() const
unsigned getXLen() const
unsigned getELEN() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
unsigned getRealMaxVLen() const
bool hasStdExtZhinxOrZhinxmin() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
bool hasVInstructionsF32() const
bool hasStdExtFOrZfinx() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, bool &IsInc, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
static RISCVII::VLMUL getLMUL(MVT VT)
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
bool isTargetStrictFPOpcode() const
Test if this node has a target-specific opcode that may raise FP exceptions (in the <target>ISD names...
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provides VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:179
size_t size() const
Definition SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:36
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:137
std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:81
static constexpr TypeSize Fixed(ScalarTy ExactSize)
Definition TypeSize.h:331
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:250
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:171
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:129
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:229
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:253
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition Type.h:378
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition Type.cpp:816
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:349
A Use represents the edge between a Value definition and its users.
Definition Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:72
Value * getOperand(unsigned i) const
Definition User.h:169
LLVM Value Representation.
Definition Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:535
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1069
Base class of all SIMD vector types.
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:238
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:163
self_iterator getIterator()
Definition ilist_node.h:82
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:750
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:723
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:476
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:44
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:250
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:559
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:714
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:239
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:780
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:199
@ GlobalAddress
Definition ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:787
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:543
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:390
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:688
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:255
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:229
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:411
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:620
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:722
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition ISDOpcodes.h:586
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:646
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:507
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:727
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:211
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:627
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:323
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:876
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:600
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:573
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:535
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:777
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:742
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:795
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:674
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:591
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:736
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:449
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:429
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:129
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:94
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:442
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:833
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:400
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition ISDOpcodes.h:636
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:52
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:612
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:866
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:423
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:852
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:783
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:763
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:493
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:340
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition ISDOpcodes.h:580
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:515
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
static bool hasRoundModeOp(uint64_t TSFlags)
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static bool hasVecPolicyOp(uint64_t TSFlags)
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
int getIntMatCost(const APInt &Val, unsigned Size, const FeatureBitset &ActiveFeatures, bool CompressionCost)
InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, std::optional< unsigned > FirstMaskArgument)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, std::optional< unsigned > FirstMaskArgument)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:57
initializer< Ty > init(const Ty &Val)
CodeModel::Model getCodeModel()
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition DWP.cpp:440
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:414
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:151
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
FunctionAddr Count
Definition InstrProf.h:115
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition STLExtras.h:2348
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:281
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:269
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:319
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:361
bool isReleaseOrStronger(AtomicOrdering AO)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1943
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:110
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:313
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:264
T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:65
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:246
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:174
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition DAGCombine.h:15
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1936
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1894
FunctionAddr Next
Definition InstrProf.h:117
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:449
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:190
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:860
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:230
static unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:292
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:373
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:129
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:267
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:283
ElementCount getVectorElementCount() const
Definition ValueTypes.h:333
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:351
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:363
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:299
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:359
bool isFixedLengthVector() const
Definition ValueTypes.h:170
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:160
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:306
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:166
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:311
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:149
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:319
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:291
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:144
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:63
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:265
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:152
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:40
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:66
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:302
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:171
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:271
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...